def remove_duplicate_stracks(stracksa, stracksb): pdist = matching.iou_distance(stracksa, stracksb) pairs = np.where(pdist < 0.15) dupa, dupb = list(), list() for p, q in zip(*pairs): timep = stracksa[p].frame_id - stracksa[p].start_frame timeq = stracksb[q].frame_id - stracksb[q].start_frame if timep > timeq: dupb.append(q) else: dupa.append(p) resa = [t for i, t in enumerate(stracksa) if not i in dupa] resb = [t for i, t in enumerate(stracksb) if not i in dupb] return resa, resb
def remove_duplicate_tracks(tracks_a, tracks_b): p_dist = matching.iou_distance(tracks_a, tracks_b) pairs = np.where(p_dist < 0.15) dup_a, dup_b = list(), list() for a, b in zip(*pairs): time_a = tracks_a[a].frame_id - tracks_a[a].start_frame time_b = tracks_b[b].frame_id - tracks_b[b].start_frame if time_a > time_b: dup_b.append(b) # choose short record time as duplicate else: dup_a.append(a) res_a = [t for i, t in enumerate(tracks_a) if not i in dup_a] res_b = [t for i, t in enumerate(tracks_b) if not i in dup_b] return res_a, res_b
def update(self, image, tlwhs, det_scores=None): self.frame_id += 1 activated_stracks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] """ step 1. prediction, 卡尔曼滤波粗略估计目标新的位置 """ for strack in itertools.chain(self.tracked_stracks, self.lost_stracks): strack.predict() """ step 2. scoring and selection 对包含预测的候选框nms处理 """ if det_scores is None: det_scores = np.ones(len(tlwhs), dtype=float) detections = [STrack(tlwh, score, from_det=True) for tlwh, score in zip(tlwhs, det_scores)] if self.classifier is None: pred_dets = [] else: self.classifier.update(image) n_dets = len(tlwhs) if self.use_tracking: tracks = [STrack(t.self_tracking(image), t.tracklet_score(), from_det=False) for t in itertools.chain(self.tracked_stracks, self.lost_stracks) if t.is_activated] detections.extend(tracks) rois = np.asarray([d.tlbr for d in detections], dtype=np.float32) cls_scores = self.classifier.predict(rois) scores = np.asarray([d.score for d in detections], dtype=np.float) scores[0:n_dets] = 1. scores = scores * cls_scores # nms if len(detections) > 0: keep = nms_detections(rois, scores.reshape(-1), nms_thresh=0.3) mask = np.zeros(len(rois), dtype=np.bool) mask[keep] = True keep = np.where(mask & (scores >= self.min_det_score))[0] detections = [detections[i] for i in keep] scores = scores[keep] for d, score in zip(detections, scores): d.score = score pred_dets = [d for d in detections if not d.from_det] detections = [d for d in detections if d.from_det] """step 3. association for tracked 对tracked轨迹进行关联跟踪""" unconfirmed, tracked_stracks = [], [] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) dists = [] for track in self.tracked_stracks: dists.append(track.tracking_distance(detections)) dists = np.stack(dists, axis=0) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.min_cls_dist) for itracked, idet in matches: tracked_stracks[itracked].update(detections[idet], self.frame_id, image) # matching for missing targets detections = [detections[i] for i in u_detection] dists = [] for track in self.lost_stracks: dists.append(track.tracking_distance(detections)) dists = np.stack(dists, axis=0) matches, _, u_detection = matching.linear_assignment(dists, thresh=self.min_cls_dist) for ilost, idet in matches: self.lost_stracks[ilost].re_activate(detections[idet], self.frame_id, image) refind_stracks.append(self.lost_stracks[ilost]) # remaining tracked # tracked len_det = len(u_detection) detections = [detections[i] for i in u_detection] + pred_dets r_tracked_stracks = [tracked_stracks[i] for i in u_track] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: r_tracked_stracks[itracked].update(detections[idet], self.frame_id, image) for it in u_track: track = r_tracked_stracks[it] track.mark_lost() lost_stracks.append(track) # unconfirmed detections = [detections[i] for i in u_detection if i < len_det] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id, image) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ step 4. init new stracks 初始化新的轨迹 """ for inew in u_detection: track = detections[inew] if not track.from_det or track.score < 0.6: continue track.activate(self.kalman_filter, self.frame_id, image) activated_stracks.append(track) """ step 5. update states 更新跟踪器状态 """ for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] self.tracked_stracks.extend(activated_stracks) self.tracked_stracks.extend(refind_stracks) self.lost_stracks.extend(lost_stracks) self.removed_stracks.extend(removed_stracks) # output_stracks = self.tracked_stracks + self.lost_stracks # get scores of lost tracks rois = np.asarray([t.tlbr for t in self.lost_stracks], dtype=np.float32) lost_cls_scores = self.classifier.predict(rois) out_lost_stracks = [t for i, t in enumerate(self.lost_stracks) if lost_cls_scores[i] > 0.3 and self.frame_id - t.end_frame <= 4] output_tracked_stracks = [track for track in self.tracked_stracks if track.is_activated] output_stracks = output_tracked_stracks + out_lost_stracks logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_stracks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) dists = matching.embedding_distance( strack_pool, detections) # 计算新检测出来的目标和tracked_tracker之间的cosine距离 STrack.multi_predict(strack_pool) # 卡尔曼预测 dists = matching.fuse_motion( self.kalman_filter, dists, strack_pool, detections) # 利用卡尔曼计算detection和pool_stacker直接的距离代价 matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) # 匈牙利匹配 // 将跟踪框和检测框进行匹配 // u_track是未匹配的tracker的索引, for itracked, idet in matches: # matches:63*2 , 63:detections的维度,2:第一列为tracked_tracker索引,第二列为detection的索引 track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update( det, self.frame_id) # 匹配的pool_tracker和detection,更新特征和卡尔曼状态 activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) # 如果是在lost中的,就重新激活 refind_stracks.append(track) ''' Step 3: Second association, with IOU''' """ 在余弦距离未匹配的detection和tracker重新用iou进行匹配 """ detections = [detections[i] for i in u_detection] # u_detection是未匹配的detection的索引 r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate( det, self.frame_id, new_id=False) # 前面已经限定了是TrackState.Tracked,这里是不用运行到的。 refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append( track) # 将和tracked_tracker iou未匹配的tracker的状态改为lost temp = 1 '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection ] # 将cosine/iou未匹配的detection和unconfirmed_tracker进行匹配 dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: # 对cosine/iou/uncofirmed_tracker都未匹配的detection重新初始化一个unconfimed_tracker track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) # 激活track,第一帧的activated=T,其他为False activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: # 消失15帧之后 track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] # 筛出tracked状态的tracker self.tracked_stracks = joint_stracks( self.tracked_stracks, activated_starcks) # 向self.tracked_stacks中添加新的detection self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # 重新匹配出的trackers self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update_tracking(self, img, img0): """ Update tracking result of the frame :param img: :param img0: :return: """ # update frame id self.frame_id += 1 # record tracking states activated_starcks_dict = defaultdict(list) refind_stracks_dict = defaultdict(list) lost_stracks_dict = defaultdict(list) removed_stracks_dict = defaultdict(list) output_stracks_dict = defaultdict(list) # ----- do detection and reid feature extraction # only get aggregated result, not original YOLO output with torch.no_grad(): pred, pred_orig, reid_feat_map = self.model.forward( img, augment=self.opt.augment) pred = pred.float() # L2 normalize feature map reid_feat_map = F.normalize(reid_feat_map, dim=1) b, reid_dim, h_id_map, w_id_map = reid_feat_map.shape assert b == 1 # make sure batch size is 1 # apply NMS pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, merge=False, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) dets = pred[0] # assume batch_size == 1 here # get reid feature for each object class if dets is None: print('[Warning]: no objects detected.') return None # Get reid feature vector for each detection b, c, h, w = img.shape # net input img size id_vects_dict = defaultdict(list) for det in dets: x1, y1, x2, y2, conf, cls_id = det # map center point from net scale to feature map scale(1/4 of net input size) center_x = (x1 + x2) * 0.5 center_y = (y1 + y2) * 0.5 center_x *= float(w_id_map) / float(w) center_y *= float(h_id_map) / float(h) # convert to int64 for indexing center_x += 0.5 # round center_y += 0.5 center_x = center_x.long() center_y = center_y.long() center_x.clamp_(0, w_id_map - 1) # avoid out of reid feature map's range center_y.clamp_(0, h_id_map - 1) id_feat_vect = reid_feat_map[0, :, center_y, center_x] id_feat_vect = id_feat_vect.squeeze() id_feat_vect = id_feat_vect.cpu().numpy() id_vects_dict[int(cls_id)].append( id_feat_vect) # put feat vect to dict(key: cls_id) # Rescale boxes from img_size to img0 size(from net input size to original size) dets[:, :4] = scale_coords(img.shape[2:], dets[:, :4], img0.shape).round() # Process each object class for cls_id in range(self.opt.num_classes): cls_inds = torch.where(dets[:, -1] == cls_id) cls_dets = dets[cls_inds] # n_objs × 6 cls_id_feature = id_vects_dict[cls_id] # n_objs × 128 cls_dets = cls_dets.detach().cpu().numpy() cls_id_feature = np.array(cls_id_feature) if len(cls_dets) > 0: '''Detections, tlbrs: top left bottom right score''' cls_detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], feat, buff_size=30) for (tlbrs, feat) in zip(cls_dets[:, :5], cls_id_feature) ] else: cls_detections = [] # reset the track ids for a different object class: for the first frame if self.frame_id == 0: for track in cls_detections: track.reset_track_id() ''' Add newly detected tracklets to tracked_stracks''' unconfirmed_dict = defaultdict(list) tracked_stracks_dict = defaultdict(list) for track in self.tracked_stracks_dict[cls_id]: if not track.is_activated: unconfirmed_dict[cls_id].append(track) else: tracked_stracks_dict[cls_id].append(track) ''' Step 2: First association, with embedding''' strack_pool_dict = defaultdict(list) strack_pool_dict[cls_id] = joint_stracks( tracked_stracks_dict[cls_id], self.lost_stracks_dict[cls_id]) # Predict the current location with KF # for strack in strack_pool: STrack.multi_predict(strack_pool_dict[cls_id]) dists = matching.embedding_distance(strack_pool_dict[cls_id], cls_detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool_dict[cls_id], cls_detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) # thresh=0.7 for i_tracked, i_det in matches: track = strack_pool_dict[cls_id][i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(cls_detections[i_det], self.frame_id) activated_starcks_dict[cls_id].append( track) # for multi-class else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks_dict[cls_id].append(track) ''' Step 3: Second association, with IOU''' cls_detections = [cls_detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool_dict[cls_id][i] for i in u_track if strack_pool_dict[cls_id][i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, cls_detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) # thresh=0.5 for i_tracked, i_det in matches: track = r_tracked_stracks[i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks_dict[cls_id].append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks_dict[cls_id].append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks_dict[cls_id].append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' cls_detections = [cls_detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed_dict[cls_id], cls_detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for i_tracked, i_det in matches: unconfirmed_dict[cls_id][i_tracked].update( cls_detections[i_det], self.frame_id) activated_starcks_dict[cls_id].append( unconfirmed_dict[cls_id][i_tracked]) for it in u_unconfirmed: track = unconfirmed_dict[cls_id][it] track.mark_removed() removed_stracks_dict[cls_id].append(track) """ Step 4: Init new stracks""" for i_new in u_detection: track = cls_detections[i_new] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks_dict[cls_id].append(track) """ Step 5: Update state""" for track in self.lost_stracks_dict[cls_id]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks_dict[cls_id].append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks_dict[cls_id] = [ t for t in self.tracked_stracks_dict[cls_id] if t.state == TrackState.Tracked ] self.tracked_stracks_dict[cls_id] = joint_stracks( self.tracked_stracks_dict[cls_id], activated_starcks_dict[cls_id]) self.tracked_stracks_dict[cls_id] = joint_stracks( self.tracked_stracks_dict[cls_id], refind_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id] = sub_stracks( self.lost_stracks_dict[cls_id], self.tracked_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id].extend(lost_stracks_dict[cls_id]) self.lost_stracks_dict[cls_id] = sub_stracks( self.lost_stracks_dict[cls_id], self.removed_stracks_dict[cls_id]) self.removed_stracks_dict[cls_id].extend( removed_stracks_dict[cls_id]) self.tracked_stracks_dict[cls_id], self.lost_stracks_dict[ cls_id] = remove_duplicate_stracks( self.tracked_stracks_dict[cls_id], self.lost_stracks_dict[cls_id]) # get scores of lost tracks output_stracks_dict[cls_id] = [ track for track in self.tracked_stracks_dict[cls_id] if track.is_activated ] # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format( # [track.track_id for track in activated_starcks_dict[cls_id]])) # logger.debug('Refind: {}'.format( # [track.track_id for track in refind_stracks_dict[cls_id]])) # logger.debug('Lost: {}'.format( # [track.track_id for track in lost_stracks_dict[cls_id]])) # logger.debug('Removed: {}'.format( # [track.track_id for track in removed_stracks_dict[cls_id]])) return output_stracks_dict
def update(self, im_blob, img0): """ Processes the image frame and finds bounding box(detections). Associates the detection with corresponding tracklets and also handles lost, removed, refound and active tracklets Parameters ---------- im_blob : torch.float32 Tensor of shape depending upon the size of image. By default, shape of this tensor is [1, 3, 608, 1088] img0 : ndarray ndarray of shape depending on the input image sequence. By default, shape is [608, 1080, 3] Returns ------- output_stracks : list of Strack(instances) The list contains information regarding the online_tracklets for the recieved image tensor. """ self.frame_id += 1 activated_starcks = [ ] # for storing active tracks, for the current frame refind_stracks = [ ] # Lost Tracks whose detections are obtained in the current frame lost_stracks = [ ] # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing) removed_stracks = [] t1 = time.time() ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): pred = self.model(im_blob) # pred is tensor of all the proposals (default number of proposals: 54264). Proposals have information associated with the bounding box and embeddings pred = pred[pred[:, :, 4] > self.opt.conf_thres] # pred now has lesser number of proposals. Proposals rejected on basis of object confidence score if len(pred) > 0: dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu() # Final proposals are obtained in dets. Information of bounding box and embeddings also included # Next step changes the detection scales scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() '''Detections is list of (x1, y1, x2, y2, object_conf, class_score, class_pred)''' # class_pred is the embeddings. detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for (tlbrs, f) in zip(dets[:, :5], dets[:, 6:]) ] else: detections = [] t2 = time.time() #print('Forward: {} s'.format(t2-t1)) ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: # previous tracks which are not active in the current frame are added in unconfirmed list unconfirmed.append(track) # print("Should not be here, in unconfirmed") else: # Active tracks are added to the local list 'tracked_stracks' tracked_stracks.append(track) ''' Step 2: First association, with embedding''' # Combining currently tracked_stracks and lost_stracks # print(tracked_stracks) strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # print(strack_pool) # Predict the current location with KF STrack.multi_predict(strack_pool, self.kalman_filter) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # The dists is the list of distances of the detection with the tracks in strack_pool matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) # The matches is the array for corresponding matches of the detection with the corresponding strack_pool for itracked, idet in matches: # itracked is the id of the track and idet is the detection track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: # If the track is active, add the detection to the track track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # None of the steps below happen if there are no undetected tracks. ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] # detections is now a list of the unmatched detections r_tracked_stracks = [ ] # This is container for stracks which were tracked till the # previous frame but no detection was found for it in the current frame for i in u_track: if strack_pool[i].state == TrackState.Tracked: r_tracked_stracks.append(strack_pool[i]) dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) # matches is the list of detections which matched with corresponding tracks by IOU distance method for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # Same process done for some unmatched detections, but now considering IOU_distance as measure for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) # The tracks which are yet not matched for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) # after all these confirmation steps, if a new detection is found, it is initialized for a new track """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" # If the tracks are lost for more frames than the threshold number, the tracks are removed. for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Remained match {} s'.format(t4-t3)) # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] print(self.tracked_stracks) self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] print(self.frame_id) # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) # logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) # logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) # logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) # print('Final {} s'.format(t5-t4)) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() ''' print("==> [multi-tracker.update] dets:", dets) print("==> [multi-tracker.update] dets.size 1:", dets.size()) # [1, 128, 6] ''' dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] ''' print("==> [multi-tracker.update] len(dets):", len(dets)) # 128 print("==> [multi-tracker.update] len(dets[0]):", len(dets[0])) # 5 dets: [[ 761.85 169.75 779.43 210.57 0.76028] [ 746.16 167.86 763.81 209.36 0.70138] [ 520.55 170.32 533.13 198.51 0.44955] [ 678.15 170.84 687.6 190.35 0.42314] [ 706.3 172.26 723 207.56 0.41279] [ 731.59 168.2 742.89 194.59 0.40816] [ 345.91 188.76 369.22 234.64 0.38459] [ 434.66 170.01 448.6 199.26 0.37619] [ 212.57 177.95 231.56 228.84 0.26836] [ 549.7 168.05 560.64 193.19 0.23459] ... ] print("self.opt.conf_thres:", self.opt.conf_thres) # 0.4 ''' remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] ''' print("==> [multi-tracker.update] len(dets):", len(dets)) # 6 print("==> [multi-tracker.update] len(id_feature):", len(id_feature)) # 6 print("==> [multi-tracker.update] id_feature[0]:", id_feature.size) # 3072 3072 = 6 * 512 embedding dimension: 512 ''' # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' ''' print("==> [multi-tracker.update] dets[:, :5]:", dets[:, :5]) print("==> [multi-tracker.update] id_feature:", id_feature) print("==> [multi-tracker.update] len(id_feature)", len(id_feature)) ==> [multi-tracker.update] dets[:, :5]: [[ 761.85 169.75 779.43 210.57 0.76028] [ 746.16 167.86 763.81 209.36 0.70138] [ 520.55 170.32 533.13 198.51 0.44955] [ 678.15 170.84 687.6 190.35 0.42314] [ 706.3 172.26 723 207.56 0.41279] [ 731.59 168.2 742.89 194.59 0.40816]] ==> [multi-tracker.update] id_feature: [[ 0.047802 0.033811 0.0041801 ... -0.018475 -0.014819 0.010965] [ 0.090996 0.015452 0.020774 ... -0.017812 -0.013593 0.016779] [ -0.023971 0.084845 0.10603 ... -0.063187 0.063411 -0.012202] [ 0.050601 0.063119 0.070075 ... -0.063469 0.0026391 0.051197] [ 0.090193 0.036841 0.045577 ... -0.024319 -0.075271 0.017419] [ 0.014926 0.089218 0.07839 ... -0.09095 0.0066383 0.076563]] ==> [multi-tracker.update] len(id_feature) 6 ''' if len(dets) > 0: '''Detections''' # put dets and id_feature to STrack # init new STrack detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) # print("==> [multi-tracker.update] len(output_stracks):", len(output_stracks)) return output_stracks
def update(self, detection, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] ''' Step 1: Network forward, get detections & embeddings''' dets = [] for x, y, w, h in detection: t, l, b, r = x - w / 2, y - h / 2, x + w / 2, y + h / 2 dets.append([t * width, l * height, b * width, r * height, 1]) dets = np.array(dets) id_feature = [] if self.opt.use_hog_reid: for box in dets[:, :5]: try: x1, y1, x2, y2, conf = max(int(box[0]), 0), max( int(box[1]), 0), min(int(box[2]), width - 1), min(int(box[3]), height - 1), box[4] id_feature.append( self.reid_model.compute( cv2.resize(img0[y1:y2, x1:x2:, ], (self.re_im_w, self.re_im_h)))[:, 0]) except: id_feature.append(np.zeros_like(id_feature[-1])) else: id_feature = np.zeros((len(dets), 1)) warp_mode = cv2.MOTION_TRANSLATION if self.prev_img is not None and self.opt.use_cam_motion == True: warp_matrix = self.get_warp_matrix(self.prev_img, img0.copy(), warp_mode, resize_factor=4) else: warp_matrix = None if self.opt.use_cam_motion: self.prev_img = img0 if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] detections_plot = detections.copy() ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool, lost_map_tracks = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF STrack.multi_predict(strack_pool, warp_matrix, warp_mode) if self.opt.use_hog_reid: dists = matching.embedding_distance( strack_pool, detections ) if not self.opt.use_reranking else matching.reranking_embeding_distance( strack_pool, detections) else: dists = np.zeros(shape=(len(strack_pool), len(detections))) if self.opt.use_kalman: dists = matching.fuse_motion(self.opt, self.kalman_filter, dists, strack_pool, detections, lost_map=lost_map_tracks, lambda_=0.99) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) #0.6 for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) else: u_detection = range(len(detections)) u_track = range(len(strack_pool)) r_stracks = strack_pool ''' Step 3: Second association, with IOU''' if self.opt.use_iou: detections = [detections[i] for i in u_detection] if self.opt.use_kalman: r_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] else: r_stracks = [strack_pool[i] for i in u_track] dists = matching.iou_distance(r_stracks, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.9) #0.7 for itracked, idet in matches: track = r_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' ''' for it in u_track: track = r_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: cam_veloc_weight = 0.95 if self.opt.use_dynamic_retrack: cam_motion = STrack.get_camera_intension( warp_matrix, warp_mode) track_vtlwh = np.array(STrack.xyah_to_tlwh(track.mean[4:])) track_vtlbr = STrack.tlwh_to_tlbr(track_vtlwh) veloc_motion = np.sqrt(np.sum(track_vtlbr**2)) max_time_lost = self.max_time_lost * 3.2 * np.exp( -(cam_veloc_weight * cam_motion + (1 - cam_veloc_weight) * veloc_motion)) else: max_time_lost = self.max_time_lost if self.frame_id - track.end_frame > max_time_lost: track.mark_removed() removed_stracks.append(track) #Remove out of screen tracklet elif track.tlwh[0] + track.tlwh[2] // 2 > width or track.tlwh[ 1] + track.tlwh[3] // 2 > height: track.num_out_frame += 1 if track.num_out_frame > STrack.out_of_frame_patience: track.mark_removed() removed_stracks.append(track) # print('Remained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks, _ = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks, _ = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.removed_stracks.extend(removed_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) #merge track output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] print('===========Frame {}=========='.format(self.frame_id)) print('Activated: {}'.format( [track.track_id for track in activated_starcks])) print('Refind: {}'.format([track.track_id for track in refind_stracks])) print('Lost: {}'.format( [track.track_id for track in self.lost_stracks])) print('Removed: {}'.format( [track.track_id for track in self.removed_stracks])) return output_stracks, detections_plot
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] t1 = time.time() ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # 包括跟踪到的和丢失的轨迹,不包括的未确认的? # Predict the current location with KF STrack.multi_predict(strack_pool) # kalman滤波估计mean std, multi_mean, multi_covariance = STrack.shared_kalman. print("# strack_pool", len(strack_pool)) sys.stdout.flush() ''' Step 1: Network forward, get detections & embeddings''' self.opt.conf_thres = 0.3 self.opt.nms_thres = 0.8 with torch.no_grad(): pred = self.model(im_blob) # im_blob: torch.Size([1, 3, 480, 864]), pred: torch.Size([1, 34020, 518]) print("# real dets:", len(pred)) sys.stdout.flush() pred = pred[pred[:, :, 4] > self.opt.conf_thres] # 0.5 #TODO, 一般的置信度是多少?还是要删掉置信度太低的 # torch.Size([68, 518]) print("# 1-pass filter dets:", len(pred)) sys.stdout.flush() if len(pred) > 0: # dets = non_max_suppression(pred.unsqueeze(0), 0.3, 0.8)[0] # conf_thres: 0.5->0.3, nms_thres: 0.4->0.8 dets = pred motion_dists = matching.iou_motion(strack_pool, dets) # 已有的轨迹的预测结果叫做strack_pool '''cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance, lambda_=0.98''' # alpha = 2.0 # motion_dists = torch.squeeze(motion_dists, 0) # argument 'input' (position 1) must be Tensor, not numpy.ndarray # print(torch.from_numpy(motion_dists).dtype) # print(dets[:, 4].dtype) # print("motion_dists", motion_dists.shape) # print("dets", dets.shape) # dets[:, 4] = alpha * dets[:, 4] + (1 - alpha) * torch.from_numpy(motion_dists).float().cuda() # dets[:, 4] = alpha * dets[:, 4] + (1 - alpha) * torch.from_numpy(motion_dists).cuda() # dets[:, 4] = dets[:, 4] + alpha * torch.from_numpy(motion_dists).cuda() dets[:, 4] = dets[:, 4] + 2.0 * torch.from_numpy(motion_dists).cuda() dets = non_max_suppression(dets.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0] scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() dets, embs = dets[:, :5].cpu().numpy(), dets[:, 6:].cpu().numpy() '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets, embs)] else: detections = [] '''cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance, lambda_=0.98''' dists = matching.embedding_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # dists = matching.iou_distance(strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' ''' 对于上次没有关联上的量测,以下代码基本没有改动 ''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ] # 如果以前是关联上的,但是今天没有关联 dists = matching.iou_distance(r_tracked_stracks, detections) # dists = matching.embedding_distance(r_tracked_stracks, detections) # dists = matching.fuse_motion(self.kalman_filter, dists, r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): if hasattr(self.model, 'relation'): outputs, stuff = self.model(im_blob) det_heads = set(['wh', 'hm', 'reg']) trk_heads = set(['id']) for head in (set(self.model.backend.heads) & det_heads): outputs[head] = getattr(self.model.backend, head)(outputs['raw']) # for head in (set(self.model.heads) & trk_heads): # outputs[head] = getattr(self.model, head)(outputs['raw_trk']) # del outputs['raw_trk'] del outputs['raw'] output = outputs if hasattr(self.model.relation, 'loss'): cur_feats = stuff[-2] self.model.relation.lock.acquire() self.model.relation.feature_bank.append( cur_feats.detach().cpu()) self.model.relation.lock.release() else: output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] self.inputs_embs.append((dets, id_feature)) # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 # print(self.frame_id) activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): # output = self.model(im_blob)[-1] output = self.model(im_blob) postprocess = PostProcess() dets = postprocess(output, img0.shape, 'val') # print(dets) # cv2.imwrite('input3.jpg', img0) # img1 = plot_detections(img0, dets[0]['boxes'].cpu()) # cv2.imwrite('out4.jpg', img1) # hm = output['hm'].sigmoid_() # wh = output['wh'] indices = dets[0]['topk_index'] # print(indices) id_feature = output['id'] # for qeury/ref id_feature=torch.index_select(output['id'], dim=1, index=indices) id_feature = F.normalize(id_feature, dim=1) #for reid # index = indices.reshape(1,len(indices)) # id_feature = _tranpose_and_gather_feat(id_feature, index) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() scores = dets[0]['scores'] bboxes = dets[0]['boxes'] dets = torch.cat([bboxes, scores.resize(scores.shape[0],1)],dim=1).cpu().numpy() # dets = self.post_process(dets, meta)#det格式为[cx,cy,w,h,s] # dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # print(len(id_feature)) # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # # 对confirmd tracks进行级联匹配 # matches, u_track, u_detection = \ # matching.matching_cascade( # dists, 0.4, 3, # strack_pool, detections, u_detection) # for itracked, idet in matches: # track = strack_pool[itracked] # det = detections[idet] # if track.state == TrackState.Tracked: # track.update(detections[idet], self.frame_id) # activated_starcks.append(track) # else: # track.re_activate(det, self.frame_id, new_id=False) # refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) # """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) #只留下状态是tracked的stracks self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)#状态是tracked+激活了的stracks self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)#状态是tracked+激活了的stracks+refind self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)#从lost中删除(状态是tracked+激活了的stracks+refind) self.lost_stracks.extend(lost_stracks)#把lost_stracks加入到全局Lost_stracks中 self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)#从全局lost_stracks中删除removed_stracks self.removed_stracks.extend(removed_stracks)#再将removed_stracked加入全局removed中 self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)#按照IOU去除重复的stracks # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, dets): self.frame_id += 1 activated_starcks = [] remain_inds = dets[:, -1] > self.opt.det_th dets = dets[remain_inds] keep = self.nms(dets, 0.7) dets = dets[keep] if len(dets) > 0: '''Detections''' detections = [STrack(tlbrs[:4], tlbrs[4]) for tlbrs in dets[:, :5]] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' tracked_stracks = [] # type: list[STrack] Prio_match = False if len(self.tracked_stracks) > 10 and self.opt.tracklet_score: Prio_match = True max_len = max([tra.tracklet_len for tra in self.tracked_stracks]) + 1 scores = [] for track in self.tracked_stracks: tracked_stracks.append(track) if Prio_match: scores.append(track.tracklet_len / max_len + track.score) # 轨迹评分函数 strack_pool = tracked_stracks if Prio_match: order = np.array(scores).argsort()[::-1] pri_strack_pool = [ tracked_stracks[i] for i in order[:order.size // 2] ] dists = matching.iou_distance(pri_strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) for itracked, idet in matches: track = pri_strack_pool[itracked] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) strack_pool = [ tracked_stracks[i] for i in order[order.size // 2:] ] + [ pri_strack_pool[i] for i in u_track if pri_strack_pool[i].state == TrackState.Tracked ] detections = [detections[i] for i in u_detection] ''' Step 2: First association, with kf''' STrack.multi_predict(strack_pool) dists = np.zeros((len(strack_pool), len(detections)), dtype=np.float) dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_removed() """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < 0.85: #self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] return output_stracks
def update(self, im_blob, img0): # 处理当前帧中的检测框 self.frame_id += 1 activated_starcks = [] refind_stracks = [] # 从上一帧到当前帧,新发现的track lost_stracks = [] # 从上一帧到当前帧,丢失的stack removed_stracks = [] # 从上一帧到当前帧,需要被移除的stack width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) # torch.Size([1, 512, 152, 272]) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # 预测框左上角、右下角的坐标表示、得分、分类,inds是图像在一维情况下的索引 # inds 是在图像转换成一维情况下,置信度得分最大的128个值,表示最大输出目标的数量 id_feature = _tranpose_and_gather_feat(id_feature, inds) # id_feature torch.Size([1, 512, 152, 272]), inds torch.Size([1, 128]) id_feature = id_feature.squeeze(0) # torch.Size([1, 128, 512]) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) # 是将在feature上的预测结果,映射到原始图像中,给出在原始图像中128个检测框的坐标、及相应置信度 dets = self.merge_outputs([dets])[1] # (128, 5) remain_inds = dets[:, 4] > self.opt.conf_thres # 仅保留置信度得分 大于 设置阈值的检测框 dets = dets[remain_inds] # (2, 5),只剩下两个检测框作为最终的结果 id_feature = id_feature[remain_inds] # (2, 512),对应的feature # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for # 直接调用类的方法进行计算,有什么特别的么? (tlbrs, f) in zip(dets[:, :5], id_feature)] # 创建strack,这里相当于tracklets else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: # 将当前帧之前存在的track,划分为unconfirmed、track_stracks两种类型 if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # 取并集 # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) # 使用卡尔曼滤波预测下一帧中目标的状态,调用每一个track的predict方法进行预测 dists = matching.embedding_distance(strack_pool, detections) # 使用embedding进行匹配,返回匹配矩阵,将detection与当前存在的track的smooth feat计算距离 #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # 对每一个track,计算其与当前帧中每一个detection的门距离 matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) # 根据门距离,使用匈牙利算法最大匹配,确定三种匹配结果 for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: # 上一帧是被追踪状态 track.update(detections[idet], self.frame_id) # track状态更新,其中 KF 的均值向量、协方差矩阵进行更新 activated_starcks.append(track) else: # 上一帧是new状态 track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' # 第二次,尝试将未匹配到的detection和未匹配到的track匹配起来 detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: # 判断track是否 track.mark_lost() lost_stracks.append(track) '''第三次匹配, Deal with unconfirmed tracks, usually tracks with only one beginning frame 仅追踪到一帧的track为unconfirmed track''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: # 与tracking的置信度阈值相比较 continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() # 移除达到条件的track removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2.0, height / 2.0], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { "c": c, "s": s, "out_height": inp_height // self.opt.down_ratio, "out_width": inp_width // self.opt.down_ratio, } """ Step 1: Network forward, get detections & embeddings""" with torch.no_grad(): output = self.model(im_blob)[-1] hm = output["hm"].sigmoid_() wh = output["wh"] id_feature = output["id"] id_feature = F.normalize(id_feature, dim=1) reg = output["reg"] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis """ for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 """ if len(dets) > 0: """Detections""" detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] """ Add newly detected tracklets to tracked_stracks""" unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) """ Step 2: First association, with embedding""" strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF # for strack in strack_pool: # strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) """ Step 3: Second association, with IOU""" detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) """Deal with unconfirmed tracks, usually tracks with only one beginning frame""" detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug("===========Frame {}==========".format(self.frame_id)) logger.debug("Activated: {}".format( [track.track_id for track in activated_starcks])) logger.debug("Refind: {}".format( [track.track_id for track in refind_stracks])) logger.debug("Lost: {}".format( [track.track_id for track in lost_stracks])) logger.debug("Removed: {}".format( [track.track_id for track in removed_stracks])) return output_stracks
def workOnDetections(opt, pred, results, img0, frame_id, save_dir, show_image, self_dict): output_tracks = [] self_dict['frame_id'] += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] if len(pred) > 0: dets = non_max_suppression(pred.unsqueeze(0), opt.conf_thres, opt.nms_thres)[0] scale_coords(opt.img_size, dets[:, :4], img0.shape).round() dets, embs = dets[:, :5].cpu().numpy(), dets[:, 6:].cpu().numpy() '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets, embs) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self_dict['tracked_stracks']: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self_dict['lost_stracks']) # Predict the current location with KF STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) dists = matching.fuse_motion(self_dict['kalman_filter'], dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self_dict['frame_id']) activated_starcks.append(track) else: track.re_activate(det, self_dict['frame_id'], new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self_dict['frame_id']) activated_starcks.append(track) else: track.re_activate(det, self_dict['frame_id'], new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self_dict['frame_id']) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self_dict['det_thresh']: continue track.activate(self_dict['kalman_filter'], self_dict['frame_id']) activated_starcks.append(track) """ Step 5: Update state""" for track in self_dict['lost_stracks']: if self_dict['frame_id'] - track.end_frame > self_dict[ 'max_time_lost']: track.mark_removed() removed_stracks.append(track) self_dict['tracked_stracks'] = [ t for t in self_dict['tracked_stracks'] if t.state == TrackState.Tracked ] self_dict['tracked_stracks'] = joint_stracks( self_dict['tracked_stracks'], activated_starcks) self_dict['tracked_stracks'] = joint_stracks( self_dict['tracked_stracks'], refind_stracks) self_dict['lost_stracks'] = sub_stracks(self_dict['lost_stracks'], self_dict['tracked_stracks']) self_dict['lost_stracks'].extend(lost_stracks) self_dict['lost_stracks'] = sub_stracks(self_dict['lost_stracks'], self_dict['removed_stracks']) self_dict['removed_stracks'].extend(removed_stracks) self_dict['tracked_stracks'], self_dict[ 'lost_stracks'] = remove_duplicate_stracks( self_dict['tracked_stracks'], self_dict['lost_stracks']) # get scores of lost tracks output_stracks = [ track for track in self_dict['tracked_stracks'] if track.is_activated ] logger.debug('===========Frame {}=========='.format( self_dict['frame_id'])) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) online_targets = output_stracks online_tlwhs = [] online_ids = [] for t in online_targets: tlwh = t.tlwh tid = t.track_id vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(tid) results.append((frame_id['0'] + 1, online_tlwhs, online_ids)) if show_image or save_dir is not None: online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id['0']) ## if show_image: cv2.imshow('online_im', online_im) if save_dir is not None: cv2.imwrite( os.path.join(save_dir, '{:05d}.jpg'.format(frame_id['0'])), online_im) frame_id['0'] += 1 # print("Processed frame: ", str(frame_id['0'])) # print("End of post-processing") return self_dict, frame_id
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] # hm = output['hm'].sigmoid_() # wh = output['wh'] # reg = output['reg'] if self.opt.reg_offset else None # dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) bboxes, scores, clses = self.detector(im_blob) human_inds = np.where(clses == 0)[0] bboxes = bboxes[human_inds] scores = np.expand_dims(scores[human_inds], axis=1) clses = np.expand_dims(clses[human_inds], axis=1) num_boxes = len(bboxes) dets = np.concatenate((bboxes, scores, clses), axis=1) scale = 1.0 / 4 bboxes_roi_align = torch.from_numpy(bboxes.copy()).cuda() bboxes_roi_align *= scale box_indexs = torch.from_numpy(np.arange(num_boxes)).to( torch.int).cuda() if num_boxes > 0: id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) id_feature = id_feature.repeat(num_boxes, 1, 1, 1) id_feature = self.roi_align(id_feature, bboxes_roi_align, box_indexs) id_feature = torch.mean(id_feature, [2, 3]).cpu().numpy() else: id_feature = np.empty((0, 128), dtype=float) # id_feature = _tranpose_and_gather_feat(id_feature, inds) # id_feature = id_feature.squeeze(0) # id_feature = id_feature.cpu().numpy() dets[:, 0] = dets[:, 0] / inp_width * width dets[:, 1] = dets[:, 1] / inp_height * height dets[:, 2] = dets[:, 2] / inp_width * width dets[:, 3] = dets[:, 3] / inp_height * height # dets = self.post_process(dets, meta) # dets = self.merge_outputs([dets])[1] # ipdb.set_trace() # remain_inds = dets[:, 4] > self.opt.conf_thres # dets = dets[remain_inds] # id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] # ipdb.set_trace() cv2.rectangle(img0, pt1=(int(bbox[0]), int(bbox[1])), pt2=(int(bbox[2]), int(bbox[3])), color=(0, 255, 0), thickness=2) cv2.imshow('dets', img0) cv2.waitKey(1) ''' # id0 = id0-1 if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] t1 = time.time() ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): pred = self.model(im_blob) pred = pred[pred[:, :, 4] > self.opt.conf_thres] if len(pred) > 0: dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu() scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for (tlbrs, f) in zip(dets[:, :5], dets[:, -self.model.emb_dim:])] else: detections = [] t2 = time.time() # print('Forward: {} s'.format(t2-t1)) ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF for strack in strack_pool: strack.predict() dists = matching.embedding_distance(strack_pool, detections) dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state==TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) t4 = time.time() # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) t5 = time.time() # print('Final {} s'.format(t5-t4)) return output_stracks
def update(self, dets, features): self.frame_id += 1 activated_starcks = [ ] # for storing active tracks, for the current frame refind_stracks = [ ] # Lost Tracks whose detections are obtained in the current frame lost_stracks = [ ] # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing) removed_stracks = [] if len(dets) > 0: detections = [ STrack(STrack.tlbr_to_tlwh(x[:4]), score=x[4], label=x[5], buffer_size=self.buffer_size, feat=feat) for (x, feat) in zip(dets, features) ] else: detections = [] # print('Forward: {} s'.format(t2-t1)) ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: # previous tracks which are not active in the current frame are added in unconfirmed list unconfirmed.append(track) # print("Should not be here, in unconfirmed") else: # Active tracks are added to the local list 'tracked_stracks' tracked_stracks.append(track) ''' Step 2: First association, with embedding''' # Combining currently tracked_stracks and lost_stracks strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF STrack.multi_predict(strack_pool, self.kalman_filter) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # The dists is the list of distances of the detection with the tracks in strack_pool matches, u_track, u_detection = matching.linear_assignment( dists, thresh=self.thresh1) # The matches is the array for corresponding matches of the detection with the corresponding strack_pool for itracked, idet in matches: # itracked is the id of the track and idet is the detection track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: # If the track is active, add the detection to the track track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # None of the steps below happen if there are no undetected tracks. ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] # detections is now a list of the unmatched detections r_tracked_stracks = [ ] # This is container for stracks which were tracked till the # previous frame but no detection was found for it in the current frame for i in u_track: if strack_pool[i].state == TrackState.Tracked: r_tracked_stracks.append(strack_pool[i]) dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=self.thresh2) # matches is the list of detections which matched with corresponding tracks by IOU distance method for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) # Same process done for some unmatched detections, but now considering IOU_distance as measure for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=self.thresh3) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) # The tracks which are yet not matched for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) # after all these confirmation steps, if a new detection is found, it is initialized for a new track """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" # If the tracks are lost for more frames than the threshold number, the tracks are removed. for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Remained match {} s'.format(t4-t3)) # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] # print('Final {} s'.format(t5-t4)) return output_stracks
def update(self, image, tlwhs=None, det_scores=None): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] # <custom> Detect detects_candidate = self.models["personDetect"].Detect(image) if len(detects_candidate) != 0: detects = detects_candidate[ detects_candidate[:, 4] >= self.configs["detect_threshold"]] # Re Detect if self.configs.get( "redetect") is not None and self.configs["redetect"]: detects_refind = detects_candidate[ (detects_candidate[:, 4] < self.configs["detect_threshold"]) & (detects_candidate[:, 4] > self.configs["redetect_target_min"]) & ((detects_candidate[:, 2] - detects_candidate[:, 0]) * (detects_candidate[:, 3] - detects_candidate[:, 1]) < 480000)] for detect in detects_refind: # Super Resolution & Deblurring frame_redetect = image[int(detect[1]):int(detect[3]), int(detect[0]):int(detect[2])] frame_redetect = self.models[ "superResolution"].SuperResolution(frame_redetect) # Re Detect redetects_candidate = self.models["personDetect"].Detect( frame_redetect) if len(redetects_candidate) == 0: continue redetects = redetects_candidate[ redetects_candidate[:, 4] > self.configs["redetect_threshold"]] if len(redetects) == 0: continue redetects[:, 0:4] = redetects[:, 0:4] / 4 redetects[:, 0:4] = redetects[:, 0:4] + [ detect[0], detect[1], detect[0], detect[1] ] detects = np.vstack((detects, redetects)) else: detects = [] if len(detects) != 0: if self.configs.get("recovery_body") is not None and self.configs[ "recovery_body"]: for detect in detects: crop_x1, crop_y1, crop_x2, crop_y2 = [ int(detect[0]), int(detect[1]), int(detect[2]), int(detect[3]) ] print((crop_x2 - crop_x1), (crop_y2 - crop_y1)) if (crop_x2 - crop_x1) * (crop_y2 - crop_y1) > 480000: continue frame_crop = image[crop_y1:crop_y2, crop_x1:crop_x2] if cv2.Laplacian(frame_crop, cv2.CV_64F).var( ) < self.configs["recovery_body_threshold"]: if self.configs["recovery_body_method"] == "sr": frame_crop = self.models[ "superResolution"].SuperResolution(frame_crop) image[crop_y1:crop_y2, crop_x1:crop_x2] = cv2.resize( frame_crop, (crop_x2 - crop_x1, crop_y2 - crop_y1), interpolation=cv2.INTER_AREA) print("SR Recovery") elif self.configs["recovery_body_method"] == "deblur": frame_crop = self.models["deblur"].Deblur( frame_crop) image[ crop_y1:crop_y2, crop_x1: crop_x2] = frame_crop #cv2.resize(frame_crop, (crop_x2-crop_x1, crop_y2-crop_y1), interpolation=cv2.INTER_AREA) print("Deblur Recovery") else: print("Recovery Parameter Error!") if len(detects) != 0: if self.configs.get("recovery_face") is not None and self.configs[ "recovery_face"]: "" if len(detects) != 0: # xyxy -> xywh tlwhs = detects.copy()[:, :4] tlwhs[:, 2:4] = tlwhs[:, 2:4] - tlwhs[:, 0:2] #tlwhs = tlwhs.tolist() else: tlwhs = [] """step 1: prediction""" for strack in itertools.chain(self.tracked_stracks, self.lost_stracks): strack.predict() """step 2: scoring and selection""" if det_scores is None: det_scores = np.ones(len(tlwhs), dtype=float) detections = [ STrack(tlwh, score, from_det=True) for tlwh, score in zip(tlwhs, det_scores) ] if self.classifier is None: pred_dets = [] else: self.classifier.update(image) n_dets = len(tlwhs) if self.use_tracking: tracks = [ STrack(t.self_tracking(image), t.tracklet_score(), from_det=False) for t in itertools.chain( self.tracked_stracks, self.lost_stracks) if t.is_activated ] detections.extend(tracks) rois = np.asarray([d.tlbr for d in detections], dtype=np.float32) cls_scores = self.classifier.predict(rois) scores = np.asarray([d.score for d in detections], dtype=np.float) scores[0:n_dets] = 1. scores = scores * cls_scores # nms if len(detections) > 0: keep = nms_detections(rois, scores.reshape(-1), nms_thresh=0.3) mask = np.zeros(len(rois), dtype=np.bool) mask[keep] = True keep = np.where(mask & (scores >= self.min_cls_score))[0] detections = [detections[i] for i in keep] scores = scores[keep] for d, score in zip(detections, scores): d.score = score pred_dets = [d for d in detections if not d.from_det] detections = [d for d in detections if d.from_det] # set features tlbrs = [det.tlbr for det in detections] features = extract_reid_features(self.reid_model, image, tlbrs) features = features.cpu().numpy() for i, det in enumerate(detections): det.set_feature(features[i]) # <custom>set face feature detects_face = [] if (self.configs.get("faceRecognition_1") is not None and self.configs["faceRecognition_1"]) or ( self.configs.get("faceRecognition_2") is not None and self.configs["faceRecognition_2"]): detects_face = self.models["faceDetect"].Detect(image) for det in detections: person_x1, person_y1, person_x2, person_y2 = det.tlbr[:4] faces = [] for detect_face in detects_face: face_x1, face_y1, face_x2, face_y2 = detect_face[:4] if person_x1 < face_x1 and person_y1 < face_y1 and person_x2 > face_x2 and person_y2 > face_x2: faces.append(detect_face) if len(faces) == 1: #cv2.imshow('face',image[int(faces[0][1]):int(faces[0][3]), int(faces[0][0]):int(faces[0][2])]) #if cv2.waitKey(1) & 0xFF == ord('q'): #return faceFeature = self.models["faceEmbed"].faceEmbedding( image[int(faces[0][1]):int(faces[0][3]), int(faces[0][0]):int(faces[0][2])]) det.set_faceFeature(faceFeature) """step 3: association for tracked""" # matching for tracked targets unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) dists = matching.nearest_reid_distance(tracked_stracks, detections, metric='euclidean') dists = matching.gate_cost_matrix(self.kalman_filter, dists, tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=self.min_ap_dist) for itracked, idet in matches: tracked_stracks[itracked].update(detections[idet], self.frame_id, image) # <custom> face matching for tracked targets if self.configs.get("faceRecognition_1") is not None and self.configs[ "faceRecognition_1"]: ''' detections : 전체 탐지 후보(수정되어도됨) tracked_stracks : 전체 트랙 후보(수정되면안됨) ''' cost_matrix = np.zeros((len(u_track), len(u_detection)), dtype=np.float) for idx_idx_track, idx_track in enumerate(u_track): if len(tracked_stracks[idx_track].faceFeatures) == 0: cost_matrix[idx_idx_track, :] = 3 continue for idx_idx_detect, idx_detect in enumerate(u_detection): if detections[idx_detect].curr_faceFeature is None: cost_matrix[idx_idx_track, idx_idx_detect] = 3 continue if self.configs["faceRecognition_1_method"] == "last": cost_matrix[ idx_idx_track, idx_idx_detect] = np.linalg.norm( tracked_stracks[idx_track].faceFeatures[-1] - detections[idx_detect].curr_faceFeature) if self.configs["faceRecognition_1_method"] == "min": min = self.configs["faceRecognition_1_threshold"] + 1e-5 for faceFeature in tracked_stracks[ idx_track].faceFeatures: distance = np.linalg.norm( faceFeature - detections[idx_detect].curr_faceFeature) if min > distance: min = distance cost_matrix[idx_idx_track, idx_idx_detect] = min if self.configs["faceRecognition_1_method"] == "mean": mean = 0 for faceFeature in tracked_stracks[ idx_track].faceFeatures: mean += np.linalg.norm( faceFeature - detections[idx_detect].curr_faceFeature) mean = mean / len( tracked_stracks[idx_track].faceFeatures) cost_matrix[idx_idx_track, idx_idx_detect] = mean cost_matrix[ cost_matrix > self.configs["faceRecognition_1_threshold"]] = self.configs[ "faceRecognition_1_threshold"] + 1e-5 # 너무 outsider는 threshold 만큼으로 고정 row_indices, col_indices = linear_sum_assignment(cost_matrix) unmatched_tracks_remove = [] unmatched_detections_remove = [] for row, col in zip( row_indices, col_indices): # for row, col in indices: 에서 교체됨 if cost_matrix[ row, col] < self.configs["faceRecognition_1_threshold"]: tracked_stracks[u_track[row]].update( detections[u_detection[col]], self.frame_id, image) unmatched_tracks_remove.append(u_track[row]) unmatched_detections_remove.append(u_detection[col]) u_track = [ track for track in u_track if track not in unmatched_tracks_remove ] u_detection = [ detect for detect in u_detection if detect not in unmatched_detections_remove ] # matching for missing targets detections = [detections[i] for i in u_detection] dists = matching.nearest_reid_distance(self.lost_stracks, detections, metric='euclidean') dists = matching.gate_cost_matrix(self.kalman_filter, dists, self.lost_stracks, detections) matches, u_lost, u_detection = matching.linear_assignment( dists, thresh=self.min_ap_dist) for ilost, idet in matches: track = self.lost_stracks[ilost] # type: STrack det = detections[idet] track.re_activate(det, self.frame_id, image, new_id=not self.use_refind) refind_stracks.append(track) # <custom> face matching for missing targets if self.configs.get("faceRecognition_2") is not None and self.configs[ "faceRecognition_2"]: cost_matrix = np.zeros((len(u_lost), len(u_detection)), dtype=np.float) for idx_idx_track, idx_track in enumerate(u_lost): if len(self.lost_stracks[idx_track].faceFeatures) == 0: cost_matrix[idx_idx_track, :] = 3 continue for idx_idx_detect, idx_detect in enumerate(u_detection): if detections[idx_detect].curr_faceFeature is None: cost_matrix[idx_idx_track, idx_idx_detect] = 3 continue if self.configs["faceRecognition_2_method"] == "last": cost_matrix[ idx_idx_track, idx_idx_detect] = np.linalg.norm( self.lost_stracks[idx_track].faceFeatures[-1] - detections[idx_detect].curr_faceFeature) if self.configs["faceRecognition_2_method"] == "min": min = self.configs["faceRecognition_2_threshold"] + 1e-5 for faceFeature in self.lost_stracks[ idx_track].faceFeatures: distance = np.linalg.norm( faceFeature - detections[idx_detect].curr_faceFeature) if min > distance: min = distance cost_matrix[idx_idx_track, idx_idx_detect] = min if self.configs["faceRecognition_2_method"] == "mean": mean = 0 for faceFeature in self.lost_stracks[ idx_track].faceFeatures: mean += np.linalg.norm( faceFeature - detections[idx_detect].curr_faceFeature) mean = mean / len( self.lost_stracks[idx_track].faceFeatures) cost_matrix[idx_idx_track, idx_idx_detect] = mean cost_matrix[ cost_matrix > self.configs["faceRecognition_2_threshold"]] = self.configs[ "faceRecognition_2_threshold"] + 1e-5 # 너무 outsider는 threshold 만큼으로 고정 row_indices, col_indices = linear_sum_assignment(cost_matrix) unmatched_tracks_remove = [] unmatched_detections_remove = [] for row, col in zip( row_indices, col_indices): # for row, col in indices: 에서 교체됨 if cost_matrix[ row, col] < self.configs["faceRecognition_2_threshold"]: #self.lost_stracks[u_lost[row]].update(detections[u_detection[col]], self.frame_id, image) unmatched_tracks_remove.append(u_lost[row]) unmatched_detections_remove.append(u_detection[col]) track = self.lost_stracks[u_lost[row]] # type: STrack det = detections[u_detection[col]] track.re_activate(det, self.frame_id, image, new_id=not self.use_refind) refind_stracks.append(track) u_lost = [ track for track in u_lost if track not in unmatched_tracks_remove ] u_detection = [ detect for detect in u_detection if detect not in unmatched_detections_remove ] # remaining tracked # tracked len_det = len(u_detection) detections = [detections[i] for i in u_detection] + pred_dets r_tracked_stracks = [tracked_stracks[i] for i in u_track] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: r_tracked_stracks[itracked].update(detections[idet], self.frame_id, image, update_feature=True) for it in u_track: track = r_tracked_stracks[it] track.mark_lost() lost_stracks.append(track) # unconfirmed detections = [detections[i] for i in u_detection if i < len_det] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id, image, update_feature=True) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """step 4: init new stracks""" for inew in u_detection: track = detections[inew] if not track.from_det or track.score < 0.6: continue track.activate(self.kalman_filter, self.frame_id, image) activated_starcks.append(track) """step 6: update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.lost_stracks = [ t for t in self.lost_stracks if t.state == TrackState.Lost ] # type: list[STrack] self.tracked_stracks.extend(activated_starcks) self.tracked_stracks.extend(refind_stracks) self.lost_stracks.extend(lost_stracks) self.removed_stracks.extend(removed_stracks) # output_stracks = self.tracked_stracks + self.lost_stracks # get scores of lost tracks rois = np.asarray([t.tlbr for t in self.lost_stracks], dtype=np.float32) lost_cls_scores = self.classifier.predict(rois) out_lost_stracks = [ t for i, t in enumerate(self.lost_stracks) if lost_cls_scores[i] > 0.3 and self.frame_id - t.end_frame <= 4 ] output_tracked_stracks = [ track for track in self.tracked_stracks if track.is_activated ] output_stracks = output_tracked_stracks + out_lost_stracks logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' ##Join track ids into one strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() ##Calculate joint average mean, dev for kalman tracker STrack.multi_predict(strack_pool) #Gets cost matrix between tracks and dets dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) #If tracks with their assignment are too far away from the kalman filter prediction then assign infinite cose #Update cost matrix with kalman filter dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) #Find optimum assignment using cost matrix #u_track and u_detecion are the unmatched tracks and detections respectively matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) #Update currently tracked tracks with matches found for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] #Get tracked tracks which were not matched before which were tracked r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] #Get cost matrix dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.6) #Default 0.5 for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) #For all of the unmatched tracks, mark them as lost for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' #For the unconfirmed tracks, tracks with only one beginning frame, use the remaining detection to try to pair them detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) #Add the matched ones for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) #For the ones that couldn't be matched, remove them for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: # If dissappeared for max_time_lost then remove if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0, output): time_clean() time_sync('prepare') self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' time_sync('prepare') with torch.no_grad(): hm = output['hm'].sigmoid_() wh = output['wh'] rois = output['rois'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None time_sync('decode') dets, inds = mot_decode2(hm, rois, wh, output['rpn_map'].shape[-2:], reg=reg, ltrb=self.opt.ltrb, K=self.opt.K, thr=self.opt.nms_thres) time_sync('decode') id_feature = id_feature[inds] # id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() time_sync('post') dets = dets.unsqueeze(0) dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] self.raw_dets.append(dets) remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] time_sync('post') # print(id_feature.shape) # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' # img_to_draw = img0.copy() # for i in range(0, dets.shape[0]): # bbox = dets[i][0:4] # cv2.rectangle(img_to_draw, (int(bbox[0]), int(bbox[1])), # (int(bbox[2]), int(bbox[3])), # (0, 255, 0), 2) # cv2.putText(img_to_draw, '%.2f'%dets[i][4], (int(bbox[0]), int(bbox[1]) - 2), cv2.FONT_HERSHEY_COMPLEX, 1, (10, 200, 200), thickness=2) # cv2.imwrite('./test.jpg', img_to_draw) # input() if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # print(dists.min(axis=1)) # print(dists) # input() matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] init_polygon=self.polygon2 if self.two_polygon_system and self.frame_id>= self.warmup_frame else self.polygon two_wheel_polygon=self.polygon four_wheel_polygon=self.polygon virtual_polygon=self.virtual_polygon huge_box_thres=150 bbox=[] score=[] types=[] huge_vehicles=[] ''' Step 1: Network forward, get detections & embeddings''' if self.opt.detection_model=='Efficient': with torch.no_grad(): ori_imgs, framed_imgs, framed_metas = preprocess([img0], max_size=self.input_size) device = torch.device('cuda:0') x = torch.stack([torch.from_numpy(fi).to(device) for fi in framed_imgs], 0) x = x.to(torch.float32 ).permute(0, 3, 1, 2) features, regression, classification, anchors = self.detetection_model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, self.opt.det_thres, self.opt.nms_thres) out = invert_affine(framed_metas, out) bbox=[] score=[] types=[] huge_vehicles=[] for j in range(len(out[0]['rois'])): obj = self.obj_list[out[0]['class_ids'][j]] if obj in self.obj_interest: x1, y1, x2, y2 = out[0]['rois'][j].astype(np.int) #bike,bicycle if (y1+y2)/2>height/2 and float(out[0]['scores'][j])<=0.25: continue if (y2-y1)*(x2-x1)<=200: continue if obj not in self.person_or_motorcycle and float(out[0]['scores'][j])>=0.3: bbox.append([x1, y1, x2, y2]) score.append( float(out[0]['scores'][j])) types.append(obj) huge_vehicles.append(False if ((y2-y1)<=huge_box_thres and (x2-x1)<=180) else True ) elif obj in self.person_or_motorcycle: #['bicycle', 'motorcycle'] bbox.append([x1, y1, x2, y2]) score.append( float(out[0]['scores'][j])) types.append(obj) huge_vehicles.append(False) elif self.opt.detection_model=='FasterRcnn': predictions= self.detetection_model.compute_prediction(img0) top_predictions=self.detetection_model.select_top_predictions(predictions) scores = top_predictions.get_field("scores").tolist() labels = top_predictions.get_field("labels").tolist() labels = [self.detetection_model.CATEGORIES[i] for i in labels] boxes = top_predictions.bbox.tolist() for j in range(len(labels)): obj = labels[j] if obj in self.obj_interest: x1, y1, x2, y2 = boxes[j] if (y1+y2)/2>0.5*height and float(scores[j])<=0.25: continue if (y2-y1)*(x2-x1)<=200: continue if obj not in self.person_or_motorcycle and float(scores[j])>=0.3: bbox.append([x1, y1, x2, y2]) score.append( float(scores[j])) types.append(obj) huge_vehicles.append(False if ((y2-y1)<=huge_box_thres and (x2-x1)<=180) else True ) elif obj in self.person_or_motorcycle and float(scores[j])>=self.opt.det_thres: #['bicycle', 'motorcycle'] bbox.append([x1, y1, x2, y2]) score.append( float(scores[j])) types.append(obj) huge_vehicles.append(False) # vis # print(len(bbox)) # print(img0.shape) # print(self.polygon) # for i in range(len(bbox)): # bb = bbox[i] # cv2.rectangle(img0, (bb[0], bb[1]), # (bb[2], bb[3]), # (0, 255, 0), 2) # cv2.polylines(img0,[np.asarray(self.polygon)],True,(0,255,255)) # cv2.imshow('dets', img0) # cv2.waitKey(0) if len(bbox) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbr), sco, clas, 30,huge_vehicle=hv) for (tlbr, sco,clas,hv) in zip(bbox,score,types,huge_vehicles)] else: detections = [] detections_plot=copy.deepcopy(detections) ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with gating distance''' strack_pool,lost_map_tracks = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) #dists = matching.embedding_distance(strack_pool, detections) detections=heuristic_occlusion_detection(detections) match_thres=100 dists=np.zeros(shape=(len(strack_pool),len(detections))) dists = matching.gate_cost_matrix3(self.kalman_filter, dists, strack_pool, detections,type_diff=True) #dists = matching.fuse_motion(self.opt,self.kalman_filter, dists, strack_pool, detections,lost_map=lost_map_tracks,occlusion_map=occlusion_map,thres=match_thres) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=match_thres) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' ''' for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.6) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] track_init_polygon=init_polygon if not track.huge_vehicle else virtual_polygon if track.score < self.det_thresh or track.occlusion_status==True or check_bbox_outside_polygon(track_init_polygon,track.tlbr): continue # track_types=self.person_or_motorcycle[0] if tlbrs_to_mean_area(track.track_trajectory) <=1500 else track.infer_type() if self.frame_id>=1 and not check_bbox_inside_polygon(track_init_polygon,track.tlbr):#and track_types in self.person_or_motorcycle #person, motorcycle continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state and getting out of interest tracklet if have""" out_of_polygon_tracklet=[] refind_stracks_copy=[] activated_starcks_copy=[] for idx,current_tracked_tracks in enumerate([refind_stracks,activated_starcks]) :# for track in current_tracked_tracks: if tlbrs_to_mean_area(track.track_trajectory) <=1000 : track_type= self.person_or_motorcycle[0] #person else: track_type=track.infer_type() if track_type in self.person_or_motorcycle: out_polygon=two_wheel_polygon p_type='two_wheel' else: out_polygon=four_wheel_polygon #if not track.huge_vehicle else virtual_polygon p_type='four_wheel' if check_bbox_outside_polygon(out_polygon,track.tlbr) : track.mark_removed() removed_stracks.append(track) if ((len(track.track_frames)>=4 and self.frame_id <=5) or (len(track.track_frames)>=5 and self.frame_id>=self.warmup_frame+5)) and idx==1:########## 4 is confident number of frame track_center=[ [(x[0]+x[2])/2,(x[1]+x[3])/2] for x in track.track_trajectory] # movement_id=counting_moi(self.paths,[(track_center[0],track_center[-1])])[0] # movement_id=self.heusristic_mov_refinement(track,movement_id) movement_id=self.heusristic_mov_detection(track,track_center) frame_id=self.frame_id out_of_polygon_tracklet.append((frame_id,track.track_id,track_type,movement_id)) else: refind_stracks_copy.append(track) if idx ==0 else activated_starcks_copy.append(track) refind_stracks=refind_stracks_copy activated_starcks=activated_starcks_copy lost_stracks_copy=[] for track in lost_stracks: if tlbrs_to_mean_area(track.track_trajectory) <=1000 : track_type= self.person_or_motorcycle[0] #person else: track_type=track.infer_type() if track_type in self.person_or_motorcycle: out_polygon=two_wheel_polygon p_type='two_wheel' else: out_polygon=four_wheel_polygon p_type='four_wheel' if check_bbox_intersect_or_outside_polygon(out_polygon,track.tlbr) : track.mark_removed() removed_stracks.append(track) if ((len(track.track_frames)>=4 and self.frame_id <=5) or (len(track.track_frames)>=6 and self.frame_id>=self.warmup_frame+5)): track_center=[ [(x[0]+x[2])/2,(x[1]+x[3])/2] for x in track.track_trajectory] # movement_id=counting_moi(self.paths,[(track_center[0],track_center[-1])])[0] # movement_id=self.heusristic_mov_refinement(track,movement_id) movement_id=self.heusristic_mov_detection(track,track_center) # line_interest=self.line1 if str(movement_id)=='1' else self.line2 # out_direction='bottom' if str(movement_id)=='1' else 'up' frame_id=self.frame_id if track_type in self.person_or_motorcycle else self.frame_id+5#kalman_predict_out_line(track,line_interest,out_direction) out_of_polygon_tracklet.append((frame_id,track.track_id,track_type,movement_id)) else: lost_stracks_copy.append(track) lost_stracks=lost_stracks_copy for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost : track.mark_removed() removed_stracks.append(track) #Remove out of screen tracklet elif track.tlwh[0]+track.tlwh[2]//2>width or track.tlwh[1]+track.tlwh[3]//2>height or min(track.tlwh[0]+track.tlwh[2]//2,track.tlwh[1]+track.tlwh[3]//2)<0: track.num_out_frame+=1 if track.num_out_frame>STrack.out_of_frame_patience: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) # print(out_of_polygon_tracklet) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks,_ = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks,_ = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) #self.merge_track() output_stracks = [track for track in self.tracked_stracks if track.is_activated] # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) # logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) # logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) # logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks,detections_plot,out_of_polygon_tracklet
def update(self, im_blob, img0, p_crops, p_crops_lengths, edge_index, gnn_output_layer=-1, p_imgs=None, conf_thres=0.3): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob, p_crops, p_crops_lengths, edge_index, p_imgs=p_imgs)[gnn_output_layer] if type(output) is list: output = output[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() if self.viz_attention and self.frame_id == self.opt.vis_attn_frame: # vis attention attn = output['p'] node0_neighbor_idx = output['node0_neighbor_idx'] keep = torch.where(attn > self.opt.vis_attn_thres)[0] self.visualize_centers(im_blob, keep, node0_neighbor_idx, attn, output, p_imgs) dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] # remain_inds = dets[:, 4] > self.opt.conf_thres remain_inds = dets[:, 4] > conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] # if track.score < self.det_thresh: if track.score < conf_thres: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update_sep3(self, im_blob, img0, conf_thres=None): self.frame_id += 1 activated_stracks_dict = defaultdict(list) refind_stracks_dict = defaultdict(list) lost_stracks_dict = defaultdict(list) removed_stracks_dict = defaultdict(list) output_stracks_dict = defaultdict(list) width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' cls_id_feats = [] with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds, cls_inds_mask = mot_decode_sp( hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) cls_id_feats = [] for cls_id in range(self.opt.num_classes): # cls_inds = inds[:, cls_inds_mask[cls_id]] cls_inds = inds[:, cls_inds_mask[cls_id].squeeze(0)] cls_id_feature = _tranpose_and_gather_feat( id_feature, cls_inds) # inds: 1×128 cls_id_feature = cls_id_feature.squeeze(0) # n × FeatDim cls_id_feature = cls_id_feature.cpu().numpy() cls_id_feats.append(cls_id_feature) dets = self.post_process(dets, meta) dets_classes = self.merge_outputs([dets]) #[1] # dets = np.concatenate( # [dets[i] for i in range(1, self.opt.num_classes+1)], axis=0) output_stracks = [] start_idx = 0 emb_thresh = [0.9, 1.7, 0.7, 0.7, 0.7, 0.7, 1.0, 1.7] for i_class in range(1, self.opt.num_classes + 1): activated_stracks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] dets = dets_classes[i_class] if conf_thres is None: remain_inds = dets[:, 4] > self.opt.conf_thres else: remain_inds = dets[:, 4] > conf_thres[i_class - 1] dets = dets[remain_inds] id_feature = cls_id_feats[i_class - 1] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30, class_id=i_class - 1) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks_sp[i_class]: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks_sp[i_class]) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) if i_class not in [4, 5, 7]: # print('not truck bus motorcycle') dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=emb_thresh[i_class - 1]) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_stracks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_stracks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_stracks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] # print(track.score, self.det_thresh) if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_stracks.append(track) """ Step 5: Update state""" for track in self.lost_stracks_sp[i_class]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks_sp[i_class] = [ t for t in self.tracked_stracks_sp[i_class] if t.state == TrackState.Tracked ] self.tracked_stracks_sp[i_class] = joint_stracks( self.tracked_stracks_sp[i_class], activated_stracks) self.tracked_stracks_sp[i_class] = joint_stracks( self.tracked_stracks_sp[i_class], refind_stracks) self.lost_stracks_sp[i_class] = sub_stracks( self.lost_stracks_sp[i_class], self.tracked_stracks_sp[i_class]) self.lost_stracks_sp[i_class].extend(lost_stracks) self.lost_stracks_sp[i_class] = sub_stracks( self.lost_stracks_sp[i_class], self.removed_stracks_sp[i_class]) self.removed_stracks_sp[i_class].extend(removed_stracks) self.tracked_stracks_sp[i_class], self.lost_stracks_sp[ i_class] = remove_duplicate_stracks( self.tracked_stracks_sp[i_class], self.lost_stracks_sp[i_class]) # get scores of lost tracks # output_stracks = [track for track in self.tracked_stracks if track.is_activated] output_stracks.extend([ track for track in self.tracked_stracks_sp[i_class] if track.is_activated ]) return output_stracks
def update(self, image, tlwhs, det_scores=None): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] """step 1: prediction""" for strack in itertools.chain(self.tracked_stracks, self.lost_stracks): strack.predict() """step 2: scoring and selection""" if det_scores is None: det_scores = np.ones(len(tlwhs), dtype=float) detections = [STrack(tlwh, score, from_det=True) for tlwh, score in zip(tlwhs, det_scores)] if self.classifier is None: pred_dets = [] else: self.classifier.update(image) n_dets = len(tlwhs) if self.use_tracking: tracks = [STrack(t.self_tracking(image), t.tracklet_score(), from_det=False) for t in itertools.chain(self.tracked_stracks, self.lost_stracks) if t.is_activated] detections.extend(tracks) rois = np.asarray([d.tlbr for d in detections], dtype=np.float32) cls_scores = self.classifier.predict(rois) scores = np.asarray([d.score for d in detections], dtype=np.float) scores[0:n_dets] = 1. scores = scores * cls_scores # nms if len(detections) > 0: keep = nms_detections(rois, scores.reshape(-1), nms_thresh=0.3) mask = np.zeros(len(rois), dtype=np.bool) mask[keep] = True keep = np.where(mask & (scores >= self.min_cls_score))[0] detections = [detections[i] for i in keep] scores = scores[keep] for d, score in zip(detections, scores): d.score = score pred_dets = [d for d in detections if not d.from_det] detections = [d for d in detections if d.from_det] # set features tlbrs = [det.tlbr for det in detections] features = extract_reid_features(self.reid_model, image, tlbrs) features = features.cpu().numpy() for i, det in enumerate(detections): det.set_feature(features[i]) """step 3: association for tracked""" # matching for tracked targets unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) dists = matching.nearest_reid_distance(tracked_stracks, detections, metric='euclidean') dists = matching.gate_cost_matrix(self.kalman_filter, dists, tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.min_ap_dist) for itracked, idet in matches: tracked_stracks[itracked].update(detections[idet], self.frame_id, image) # matching for missing targets detections = [detections[i] for i in u_detection] dists = matching.nearest_reid_distance(self.lost_stracks, detections, metric='euclidean') dists = matching.gate_cost_matrix(self.kalman_filter, dists, self.lost_stracks, detections) matches, u_lost, u_detection = matching.linear_assignment(dists, thresh=self.min_ap_dist) for ilost, idet in matches: track = self.lost_stracks[ilost] # type: STrack det = detections[idet] track.re_activate(det, self.frame_id, image, new_id=not self.use_refind) refind_stracks.append(track) # remaining tracked # tracked len_det = len(u_detection) detections = [detections[i] for i in u_detection] + pred_dets r_tracked_stracks = [tracked_stracks[i] for i in u_track] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: r_tracked_stracks[itracked].update(detections[idet], self.frame_id, image, update_feature=True) for it in u_track: track = r_tracked_stracks[it] track.mark_lost() lost_stracks.append(track) # unconfirmed detections = [detections[i] for i in u_detection if i < len_det] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id, image, update_feature=True) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """step 4: init new stracks""" for inew in u_detection: track = detections[inew] if not track.from_det or track.score < 0.6: continue track.activate(self.kalman_filter, self.frame_id, image) activated_starcks.append(track) """step 6: update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] self.tracked_stracks.extend(activated_starcks) self.tracked_stracks.extend(refind_stracks) self.lost_stracks.extend(lost_stracks) self.removed_stracks.extend(removed_stracks) # output_stracks = self.tracked_stracks + self.lost_stracks # get scores of lost tracks rois = np.asarray([t.tlbr for t in self.lost_stracks], dtype=np.float32) lost_cls_scores = self.classifier.predict(rois) out_lost_stracks = [t for i, t in enumerate(self.lost_stracks) if lost_cls_scores[i] > 0.3 and self.frame_id - t.end_frame <= 4] output_tracked_stracks = [track for track in self.tracked_stracks if track.is_activated] output_stracks = output_tracked_stracks + out_lost_stracks logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings 这里首先通过backbone获取到对应的各个head的输出,接着进行后处理及置信度过滤(NMS),将新的目标加入轨迹 ''' with torch.no_grad(): output = self.model(im_blob)[-1] # 检测网络的检测结果 hm = output['hm'].sigmoid_() # 检测网络输出的热力图 wh = output['wh'] # 检测网络输出的目标宽高 id_feature = output['id'] # 检测网络输出的Re-ID特征 id_feature = F.normalize(id_feature, dim=1) reg = output[ 'reg'] if self.opt.reg_offset else None # 检测网络输出的目标中心offset # 检测的det res(bb, score, clses, ID)以及特征得分图的排序的有效index dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # 根据 index 选取 有效的Re-ID特征 id_feature = _tranpose_and_gather_feat(id_feature, inds) # 去除那些维度大小为1的维度 id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() # 对检测结果做后处理 dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] # 检测置信度阈值过滤,得到有效的目标和对应的Re-ID特征 remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections 对每个检测目标转化为跟踪对象,并绑定检测结果等属性''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding 1. 将[activated_stracks lost_stracks]融合成strack_pool 2. detections和strack_pool根据feats计算外观cost矩阵,就是用feat计算cosine距离 3. 利用卡尔曼算法预测strack_pool的新的mean,covariance、 4. 计算strack_pool和detection的距离cost矩阵,并将大于距离阈值的外观cost矩阵赋值为inf 5. 利用匈牙利算法进行匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: strack_pool中的track_state==tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks strack_pool中的track_state!=tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入refind_stracks b. 未成功匹配: 得到新的detections,r_tracked_stracks ''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) STrack.multi_predict(strack_pool) # 卡尔曼预测 dists = matching.embedding_distance( strack_pool, detections) # 计算新检测出来的目标detections和strack_pool之间的cosine距离 #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion( self.kalman_filter, dists, strack_pool, detections ) # 利用卡尔曼计算strack_pool和detection的距离cost,并将大于距离阈值的外观cost矩阵赋值为inf(距离约束) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7 ) # LAPJV匹配 // 将跟踪框和检测框进行匹配 // matches是匹配对索引,u_track是未匹配的tracker的索引,u_detection是未匹配的检测目标索引 for itracked, idet in matches: # matches:63*2 , 63:匹配成对个数,2:第一列为tracked_tracker索引,第二列为detection的索引 track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) # 匹配的tracker和detection,更新特征和卡尔曼状态 activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) # 如果是在lost中的,就重新激活 refind_stracks.append(track) ''' Step 3: Second association, with IOU 对余弦距离未匹配剩下的detections,r_tracked_stracks进行IOU匹配 1. detections和r_tracked_stracks计算IOU cost矩阵 2. 针对IOU cost进行匈牙利匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: r_tracked_stracks中的track_state==tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks r_tracked_stracks中的track_state!=tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入refind_stracks b. 未成功匹配: r_tracked_stracks中的状态track_state不为lost的,改为lost detections再遗留到下一步进行继续匹配 ''' detections = [detections[i] for i in u_detection] # u_detection是上步未匹配的detection的索引 r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] # 上步没有匹配的且是跟踪状态的tracker dists = matching.iou_distance(r_tracked_stracks, detections) # 计算IOU cost矩阵 matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) # 针对IOU cost进行LAPJV匹配 for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append( track) # 将和r_tracked_stracks iou未匹配的剩下的tracker的状态改为lost ''' Deal with unconfirmed tracks, usually tracks with only one beginning frame 上一步遗留的detection与unconfirmed_stracks进行IOU匹配 1. 计算IOU cost矩阵 2. 匈牙利匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: 更新 unconfirmed_stracks,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks b. 未成功匹配: unconfirmed_stracks直接计入removed_stracks 不能匹配的detections,再遗留到下一步 ''' detections = [detections[i] for i in u_detection ] # 将cosine/iou 未匹配的detection和unconfirmed_tracker进行匹配 dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update( detections[idet], self.frame_id ) # 更新 unconfirmed_stracks,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append( track) # unconfirmed_stracks直接计入removed_stracks """ Step 4: Init new stracks 上一步遗留的detections,初始化成新的tracker,计入activated_stracks """ for inew in u_detection: # 对cosine/iou/uncofirmed_tracker都未匹配的detection重新初始化成一个新的tracker track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) # 激活track,第一帧的activated=T,其他为False activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: # 消失 max_time_lost 帧之后,计入removed_stracks,删除 track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] # 筛出tracked状态的tracker self.tracked_stracks = joint_stracks( self.tracked_stracks, activated_starcks) # 向self.tracked_stacks中添加新的detection self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # 重新匹配出的trackers self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update_tracking(self, img, img0): """ Update tracking result of the frame :param img: :param img0: :return: """ # update frame id self.frame_id += 1 # record tracking states activated_tracks_dict = defaultdict(list) refined_tracks_dict = defaultdict(list) lost_tracks_dict = defaultdict(list) removed_tracks_dict = defaultdict(list) output_tracks_dict = defaultdict(list) # ----- do detection and reid feature extraction # only get aggregated result, not original YOLO output with torch.no_grad(): # t1 = torch_utils.time_synchronized() pred, pred_orig, reid_feat_out, yolo_ids = self.model.forward(img, augment=self.opt.augment) pred = pred.float() # L2 normalize feature map reid_feat_out[0] = F.normalize(reid_feat_out[0], dim=1) # apply NMS pred, pred_yolo_ids = non_max_suppression_with_yolo_inds(pred, yolo_ids, self.opt.conf_thres, self.opt.iou_thres, merge=False, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) dets = pred[0] # assume batch_size == 1 here dets_yolo_ids = pred_yolo_ids[0].squeeze() # t2 = torch_utils.time_synchronized() # print('run time (%.3fs)' % (t2 - t1)) # get reid feature for each object class if dets is None: print('[Warning]: no objects detected.') return None # Get reid feature vector for each detection b, c, h, w = img.shape # net input img size id_vects_dict = defaultdict(list) for det, yolo_id in zip(dets, dets_yolo_ids): x1, y1, x2, y2, conf, cls_id = det # print('box area {:.3f}, yolo {:d}'.format((y2-y1) * (x2-x1), int(yolo_id))) # get reid map for this bbox(corresponding yolo idx) reid_feat_map = reid_feat_out[yolo_id] b, reid_dim, h_id_map, w_id_map = reid_feat_map.shape assert b == 1 # make sure batch size is 1 # map center point from net scale to feature map scale(1/4 of net input size) center_x = (x1 + x2) * 0.5 center_y = (y1 + y2) * 0.5 center_x *= float(w_id_map) / float(w) center_y *= float(h_id_map) / float(h) # convert to int64 for indexing center_x += 0.5 # round center_y += 0.5 center_x = center_x.long() center_y = center_y.long() center_x.clamp_(0, w_id_map - 1) # avoid out of reid feature map's range center_y.clamp_(0, h_id_map - 1) id_feat_vect = reid_feat_map[0, :, center_y, center_x] id_feat_vect = id_feat_vect.squeeze() # L2 normalize the feature vector id_feat_vect = F.normalize(id_feat_vect, dim=0) id_feat_vect = id_feat_vect.cpu().numpy() id_vects_dict[int(cls_id)].append(id_feat_vect) # put feat vect to dict(key: cls_id) # Rescale boxes from img_size to img0 size(from net input size to original size) dets[:, :4] = scale_coords(img.shape[2:], dets[:, :4], img0.shape).round() # Process each object class for cls_id in range(self.opt.num_classes): cls_inds = torch.where(dets[:, -1] == cls_id) cls_dets = dets[cls_inds] # n_objs × 6 cls_id_feature = id_vects_dict[cls_id] # n_objs × 128 cls_dets = cls_dets.detach().cpu().numpy() cls_id_feature = np.array(cls_id_feature) if len(cls_dets) > 0: '''Detections, tlbrs: top left bottom right score''' cls_detections = [Track(Track.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], feat, buff_size=30) for (tlbrs, feat) in zip(cls_dets[:, :5], cls_id_feature)] # detection of current frame else: cls_detections = [] # reset the track ids for a different object class in the first frame if self.frame_id == 1: for track in cls_detections: track.reset_track_id() ''' Add newly detected tracks(current frame) to tracked_tracks''' unconfirmed_dict = defaultdict(list) tracked_tracks_dict = defaultdict(list) for track in self.tracked_tracks_dict[cls_id]: if not track.is_activated: unconfirmed_dict[cls_id].append(track) # record unconfirmed tracks in this frame else: tracked_tracks_dict[cls_id].append(track) # record tracked tracks of this frame ''' Step 2: First association, with embedding''' # build track pool for the current frame by joining tracked_tracks and lost tracks track_pool_dict = defaultdict(list) track_pool_dict[cls_id] = join_tracks(tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id]) # Predict the current location with KF # for track in track_pool: # kalman predict for track_pool Track.multi_predict(track_pool_dict[cls_id]) dists = matching.embedding_distance(track_pool_dict[cls_id], cls_detections) dists = matching.fuse_motion(self.kalman_filter, dists, track_pool_dict[cls_id], cls_detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) # thresh=0.7 for i_tracked, i_det in matches: # process matched pairs between track pool and current frame detection track = track_pool_dict[cls_id][i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(cls_detections[i_det], self.frame_id) activated_tracks_dict[cls_id].append(track) # for multi-class else: # re-activate the lost track track.re_activate(det, self.frame_id, new_id=False) refined_tracks_dict[cls_id].append(track) ''' Step 3: Second association, with IOU''' # match between track pool and unmatched detection in current frame cls_detections = [cls_detections[i] for i in u_detection] # get un-matched detections for following iou matching r_tracked_tracks = [track_pool_dict[cls_id][i] for i in u_track if track_pool_dict[cls_id][i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_tracks, cls_detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) # thresh=0.5 for i_tracked, i_det in matches: # process matched tracks track = r_tracked_tracks[i_tracked] det = cls_detections[i_det] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_tracks_dict[cls_id].append(track) else: track.re_activate(det, self.frame_id, new_id=False) refined_tracks_dict[cls_id].append(track) for it in u_track: # process unmatched tracks for two rounds track = r_tracked_tracks[it] if not track.state == TrackState.Lost: track.mark_lost() # mark unmatched track as lost track lost_tracks_dict[cls_id].append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' cls_detections = [cls_detections[i] for i in u_detection] # current frame's unmatched detection dists = matching.iou_distance(unconfirmed_dict[cls_id], cls_detections) # iou matching matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for i_tracked, i_det in matches: unconfirmed_dict[cls_id][i_tracked].update(cls_detections[i_det], self.frame_id) activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][i_tracked]) for it in u_unconfirmed: track = unconfirmed_dict[cls_id][it] track.mark_removed() removed_tracks_dict[cls_id].append(track) """ Step 4: Init new tracks""" for i_new in u_detection: # current frame's unmatched detection track = cls_detections[i_new] if track.score < self.det_thresh: continue # tracked but not activated track.activate(self.kalman_filter, self.frame_id) # Note: activate do not set 'is_activated' to be True # activated_tarcks_dict may contain track with 'is_activated' False activated_tracks_dict[cls_id].append(track) """ Step 5: Update state""" # update removed tracks for track in self.lost_tracks_dict[cls_id]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_tracks_dict[cls_id].append(track) # print('Remained match {} s'.format(t4-t3)) self.tracked_tracks_dict[cls_id] = [t for t in self.tracked_tracks_dict[cls_id] if t.state == TrackState.Tracked] self.tracked_tracks_dict[cls_id] = join_tracks(self.tracked_tracks_dict[cls_id], activated_tracks_dict[cls_id]) # add activated track self.tracked_tracks_dict[cls_id] = join_tracks(self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id]) # add refined track self.lost_tracks_dict[cls_id] = sub_tracks(self.lost_tracks_dict[cls_id], self.tracked_tracks_dict[cls_id]) # update lost tracks self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id]) self.lost_tracks_dict[cls_id] = sub_tracks(self.lost_tracks_dict[cls_id], self.removed_tracks_dict[cls_id]) self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id]) self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id] = remove_duplicate_tracks( self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id]) # get scores of lost tracks output_tracks_dict[cls_id] = [track for track in self.tracked_tracks_dict[cls_id] if track.is_activated] # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format( # [track.track_id for track in activated_tracks_dict[cls_id]])) # logger.debug('Refined: {}'.format( # [track.track_id for track in refined_tracks_dict[cls_id]])) # logger.debug('Lost: {}'.format( # [track.track_id for track in lost_tracks_dict[cls_id]])) # logger.debug('Removed: {}'.format( # [track.track_id for track in removed_tracks_dict[cls_id]])) return output_tracks_dict
def _update(self, detections): activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: if not self.merge_tracks(track): track.mark_removed() removed_stracks.append(track) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks, activated_starcks, self.lost_stracks