def predict(self, img0): # img0 = cv2.imread(img_path) # BGR img, _, _, _ = letterbox(img0, height=640, width=640) # Normalize RGB img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img, dtype=np.float32) img /= 255.0 im_blob = torch.from_numpy(img).unsqueeze(0).to(self.device) width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // down_ratio, 'out_width': inp_width // down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] dets, inds = mot_decode(hm, wh, reg=reg, ltrb=True, K=500) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = post_process(dets, meta) dets = merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] res = [] for i in range(0, dets.shape[0]): bbox = dets[i][0:4] bbox = [ int(min(bbox[0], bbox[2])), int(min(bbox[1], bbox[3])), int(max(bbox[0], bbox[2])), int(max(bbox[1], bbox[3])), ] res.append({ "reid": id_feature[i], "bbox": bbox, }) return res
def save_result(self, output, batch, results): reg = output['reg'] if self.opt.reg_offset else None dets = mot_decode( output['hm'], output['wh'], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = ctdet_post_process( dets.copy(), batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
def save_result(self, output, batch, results): reg = output["reg"] if self.opt.reg_offset else None dets = mot_decode( output["hm"], output["wh"], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K, ) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = ctdet_post_process( dets.copy(), batch["meta"]["c"].cpu().numpy(), batch["meta"]["s"].cpu().numpy(), output["hm"].shape[2], output["hm"].shape[3], output["hm"].shape[1], ) results[batch["meta"]["img_id"].cpu().numpy()[0]] = dets_out[0]
def detect(self, im_blob, img0): self.frame_id += 1 width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) dets = self.post_process(dets, meta) # print(dets[1].shape, dets[2].shape, dets[3].shape, dets[4].shape, ) dets = self.merge_outputs([dets]) #[1] ## merge all classes to on big array # for i in range(1, self.opt.num_classes+1): dets = np.concatenate([ np.hstack((dets[i], i * np.ones((dets[i].shape[0], 1)))) for i in range(1, self.opt.num_classes + 1) ], axis=0) # dets = dets_merged remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] return dets
def detect(self, im_blob, img0): width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres #format tlbr dets = dets[remain_inds] return dets
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' ##Join track ids into one strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() ##Calculate joint average mean, dev for kalman tracker STrack.multi_predict(strack_pool) #Gets cost matrix between tracks and dets dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) #If tracks with their assignment are too far away from the kalman filter prediction then assign infinite cose #Update cost matrix with kalman filter dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) #Find optimum assignment using cost matrix #u_track and u_detecion are the unmatched tracks and detections respectively matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) #Update currently tracked tracks with matches found for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] #Get tracked tracks which were not matched before which were tracked r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] #Get cost matrix dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.6) #Default 0.5 for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) #For all of the unmatched tracks, mark them as lost for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' #For the unconfirmed tracks, tracks with only one beginning frame, use the remaining detection to try to pair them detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) #Add the matched ones for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) #For the ones that couldn't be matched, remove them for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: # If dissappeared for max_time_lost then remove if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30): if save_dir: mkdir_if_missing(save_dir) if opt.gpus[0] >= 0: opt.device = 'gpu' else: opt.device = 'cpu' paddle.set_device(opt.device) print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) model = load_model(model, opt.load_model) # model = torch.nn.DataParallel(model) # model = model.to(opt.device) model.eval() timer = Timer() results = [] frame_id = 0 for path, img, img0 in dataloader: if frame_id % 20 == 0: logger.info('Processing frame {} ({:.2f} fps)'.format( frame_id, 1. / max(1e-5, timer.average_time))) # run detecting timer.tic() # blob = torch.from_numpy(img).cuda().unsqueeze(0) blob = paddle.to_tensor(img).unsqueeze(0) width = img0.shape[1] height = img0.shape[0] inp_height = blob.shape[2] inp_width = blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // opt.down_ratio, 'out_width': inp_width // opt.down_ratio } # with torch.no_grad(): with paddle.clear_grad(): output = model(blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=opt.ltrb, K=opt.K) dets = post_process(opt, dets, meta) dets = merge_outputs(opt, [dets])[1] dets = dets[dets[:, 4] > 0.1] dets[:, :4] = tlbr2tlwh(dets[:, :4]) tlwhs = [] scores = [] for *tlwh, conf in dets: tlwhs.append(tlwh) scores.append(conf) timer.toc() # save results results.append((frame_id + 1, tlwhs, scores)) frame_id += 1 # save results write_results_score(result_filename, results) #write_results_score_hie(result_filename, results, data_type) return frame_id, timer.average_time, timer.calls
width = origin_shape[1] height = origin_shape[0] inp_height = 608 inp_width = 1088 c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // opt.down_ratio, 'out_width': inp_width // opt.down_ratio} hm = output['hm'] hm_sig = output['hm'].sigmoid_() hm_sig_soft_nms = soft_nms(hm_sig.detach().cpu(), 3, 3, 21, 21, thresh=THRESH) wh = output['wh'] reg = output['reg'] if opt.reg_offset else None opt.K = 200 detections, inds = mot_decode(hm_sig, wh, reg=reg, cat_spec_wh=opt.cat_spec_wh, thresh=THRESH, K=opt.K) dets = post_process(opt, detections, meta) dets = merge_outputs(opt, [dets])[1] remain_inds = dets[:, 4] > THRESH dets = dets[remain_inds] remain_inds = (dets[:, 2] - dets[:, 0]) * (dets[:, 3] - dets[:, 1]) > opt.min_box_area dets = dets[remain_inds] rects = [] cmap = get_cmap(dets.shape[0]) for i, det in enumerate(dets): col = cmap(i) rect = plt.Rectangle((det[0], det[1]), det[2]-det[0], det[3]-det[1], fill=False, edgecolor=col, linewidth=1) ax12.add_patch(rect) # hm_nms = (hm_nms - hm_nms.mean())/hm_nms.std() im11 = ax11.imshow(hm.detach().cpu().squeeze())
def test_det( opt, batch_size=12, img_size=(1088, 608), iou_thres=0.5, print_interval=40, ): data_cfg = opt.data_cfg f = open(data_cfg) data_cfg_dict = json.load(f) f.close() nC = 1 test_path = data_cfg_dict['test'] dataset_root = data_cfg_dict['root'] if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) model = load_model(model, opt.load_model) #model = torch.nn.DataParallel(model) model = model.to(opt.device) model.eval() # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = DetDataset(dataset_root, test_path, img_size, augment=False, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False, collate_fn=collate_fn) mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ [], [], [], [], [], [], [], [], [] AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() #seen += batch_size output = model(imgs.cuda())[-1] origin_shape = shapes[0] width = origin_shape[1] height = origin_shape[0] inp_height = img_size[1] inp_width = img_size[0] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // opt.down_ratio, 'out_width': inp_width // opt.down_ratio } hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if opt.reg_offset else None opt.K = 200 detections, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) # Compute average precision for each sample targets = [targets[i][:int(l)] for i, l in enumerate(targets_len)] for si, labels in enumerate(targets): seen += 1 #path = paths[si] #img0 = cv2.imread(path) dets = detections[si] dets = dets.unsqueeze(0) dets = post_process(opt, dets, meta) dets = merge_outputs(opt, [dets])[1] #remain_inds = dets[:, 4] > opt.det_thres #dets = dets[remain_inds] if dets is None: # If there are labels but no detections mark as zero AP if labels.size(0) != 0: mAPs.append(0), mR.append(0), mP.append(0) continue # If no labels add number of detections as incorrect correct = [] if labels.size(0) == 0: # correct.extend([0 for _ in range(len(detections))]) mAPs.append(0), mR.append(0), mP.append(0) continue else: target_cls = labels[:, 0] # Extract target boxes as (x1, y1, x2, y2) target_boxes = xywh2xyxy(labels[:, 2:6]) target_boxes[:, 0] *= width target_boxes[:, 2] *= width target_boxes[:, 1] *= height target_boxes[:, 3] *= height ''' path = paths[si] img0 = cv2.imread(path) img1 = cv2.imread(path) for t in range(len(target_boxes)): x1 = target_boxes[t, 0] y1 = target_boxes[t, 1] x2 = target_boxes[t, 2] y2 = target_boxes[t, 3] cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imwrite('gt.jpg', img0) for t in range(len(dets)): x1 = dets[t, 0] y1 = dets[t, 1] x2 = dets[t, 2] y2 = dets[t, 3] cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imwrite('pred.jpg', img1) abc = ace ''' detected = [] for *pred_bbox, conf in dets: obj_pred = 0 pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) # Compute iou with target boxes iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0] # Extract index of largest overlap best_i = np.argmax(iou) # If overlap exceeds threshold and classification is correct mark as correct if iou[best_i] > iou_thres and obj_pred == labels[ best_i, 0] and best_i not in detected: correct.append(1) detected.append(best_i) else: correct.append(0) # Compute Average Precision (AP) per class AP, AP_class, R, P = ap_per_class( tp=correct, conf=dets[:, 4], pred_cls=np.zeros_like(dets[:, 4]), # detections[:, 6] target_cls=target_cls) # Accumulate AP per class AP_accum_count += np.bincount(AP_class, minlength=nC) AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) # Compute mean AP across all classes in this image, and append to image list mAPs.append(AP.mean()) mR.append(R.mean()) mP.append(P.mean()) # Means of all images mean_mAP = np.sum(mAPs) / (AP_accum_count + 1E-16) mean_R = np.sum(mR) / (AP_accum_count + 1E-16) mean_P = np.sum(mP) / (AP_accum_count + 1E-16) if batch_i % print_interval == 0: # Print image mAP and running mean mAP print(('%11s%11s' + '%11.3g' * 4 + 's') % (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t)) # Print mAP per class print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16))) # Return mAP return mean_mAP, mean_R, mean_P
def update(self, im_blob, img0): # 处理当前帧中的检测框 self.frame_id += 1 activated_starcks = [] refind_stracks = [] # 从上一帧到当前帧,新发现的track lost_stracks = [] # 从上一帧到当前帧,丢失的stack removed_stracks = [] # 从上一帧到当前帧,需要被移除的stack width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) # torch.Size([1, 512, 152, 272]) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # 预测框左上角、右下角的坐标表示、得分、分类,inds是图像在一维情况下的索引 # inds 是在图像转换成一维情况下,置信度得分最大的128个值,表示最大输出目标的数量 id_feature = _tranpose_and_gather_feat(id_feature, inds) # id_feature torch.Size([1, 512, 152, 272]), inds torch.Size([1, 128]) id_feature = id_feature.squeeze(0) # torch.Size([1, 128, 512]) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) # 是将在feature上的预测结果,映射到原始图像中,给出在原始图像中128个检测框的坐标、及相应置信度 dets = self.merge_outputs([dets])[1] # (128, 5) remain_inds = dets[:, 4] > self.opt.conf_thres # 仅保留置信度得分 大于 设置阈值的检测框 dets = dets[remain_inds] # (2, 5),只剩下两个检测框作为最终的结果 id_feature = id_feature[remain_inds] # (2, 512),对应的feature # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for # 直接调用类的方法进行计算,有什么特别的么? (tlbrs, f) in zip(dets[:, :5], id_feature)] # 创建strack,这里相当于tracklets else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: # 将当前帧之前存在的track,划分为unconfirmed、track_stracks两种类型 if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # 取并集 # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) # 使用卡尔曼滤波预测下一帧中目标的状态,调用每一个track的predict方法进行预测 dists = matching.embedding_distance(strack_pool, detections) # 使用embedding进行匹配,返回匹配矩阵,将detection与当前存在的track的smooth feat计算距离 #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) # 对每一个track,计算其与当前帧中每一个detection的门距离 matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) # 根据门距离,使用匈牙利算法最大匹配,确定三种匹配结果 for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: # 上一帧是被追踪状态 track.update(detections[idet], self.frame_id) # track状态更新,其中 KF 的均值向量、协方差矩阵进行更新 activated_starcks.append(track) else: # 上一帧是new状态 track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' # 第二次,尝试将未匹配到的detection和未匹配到的track匹配起来 detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: # 判断track是否 track.mark_lost() lost_stracks.append(track) '''第三次匹配, Deal with unconfirmed tracks, usually tracks with only one beginning frame 仅追踪到一帧的track为unconfirmed track''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: # 与tracking的置信度阈值相比较 continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() # 移除达到条件的track removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2.0, height / 2.0], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { "c": c, "s": s, "out_height": inp_height // self.opt.down_ratio, "out_width": inp_width // self.opt.down_ratio, } """ Step 1: Network forward, get detections & embeddings""" with torch.no_grad(): output = self.model(im_blob)[-1] hm = output["hm"].sigmoid_() wh = output["wh"] id_feature = output["id"] id_feature = F.normalize(id_feature, dim=1) reg = output["reg"] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis """ for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 """ if len(dets) > 0: """Detections""" detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] """ Add newly detected tracklets to tracked_stracks""" unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) """ Step 2: First association, with embedding""" strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF # for strack in strack_pool: # strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) """ Step 3: Second association, with IOU""" detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) """Deal with unconfirmed tracks, usually tracks with only one beginning frame""" detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug("===========Frame {}==========".format(self.frame_id)) logger.debug("Activated: {}".format( [track.track_id for track in activated_starcks])) logger.debug("Refind: {}".format( [track.track_id for track in refind_stracks])) logger.debug("Lost: {}".format( [track.track_id for track in lost_stracks])) logger.debug("Removed: {}".format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() ''' print("==> [multi-tracker.update] dets:", dets) print("==> [multi-tracker.update] dets.size 1:", dets.size()) # [1, 128, 6] ''' dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] ''' print("==> [multi-tracker.update] len(dets):", len(dets)) # 128 print("==> [multi-tracker.update] len(dets[0]):", len(dets[0])) # 5 dets: [[ 761.85 169.75 779.43 210.57 0.76028] [ 746.16 167.86 763.81 209.36 0.70138] [ 520.55 170.32 533.13 198.51 0.44955] [ 678.15 170.84 687.6 190.35 0.42314] [ 706.3 172.26 723 207.56 0.41279] [ 731.59 168.2 742.89 194.59 0.40816] [ 345.91 188.76 369.22 234.64 0.38459] [ 434.66 170.01 448.6 199.26 0.37619] [ 212.57 177.95 231.56 228.84 0.26836] [ 549.7 168.05 560.64 193.19 0.23459] ... ] print("self.opt.conf_thres:", self.opt.conf_thres) # 0.4 ''' remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] ''' print("==> [multi-tracker.update] len(dets):", len(dets)) # 6 print("==> [multi-tracker.update] len(id_feature):", len(id_feature)) # 6 print("==> [multi-tracker.update] id_feature[0]:", id_feature.size) # 3072 3072 = 6 * 512 embedding dimension: 512 ''' # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' ''' print("==> [multi-tracker.update] dets[:, :5]:", dets[:, :5]) print("==> [multi-tracker.update] id_feature:", id_feature) print("==> [multi-tracker.update] len(id_feature)", len(id_feature)) ==> [multi-tracker.update] dets[:, :5]: [[ 761.85 169.75 779.43 210.57 0.76028] [ 746.16 167.86 763.81 209.36 0.70138] [ 520.55 170.32 533.13 198.51 0.44955] [ 678.15 170.84 687.6 190.35 0.42314] [ 706.3 172.26 723 207.56 0.41279] [ 731.59 168.2 742.89 194.59 0.40816]] ==> [multi-tracker.update] id_feature: [[ 0.047802 0.033811 0.0041801 ... -0.018475 -0.014819 0.010965] [ 0.090996 0.015452 0.020774 ... -0.017812 -0.013593 0.016779] [ -0.023971 0.084845 0.10603 ... -0.063187 0.063411 -0.012202] [ 0.050601 0.063119 0.070075 ... -0.063469 0.0026391 0.051197] [ 0.090193 0.036841 0.045577 ... -0.024319 -0.075271 0.017419] [ 0.014926 0.089218 0.07839 ... -0.09095 0.0066383 0.076563]] ==> [multi-tracker.update] len(id_feature) 6 ''' if len(dets) > 0: '''Detections''' # put dets and id_feature to STrack # init new STrack detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) # print("==> [multi-tracker.update] len(output_stracks):", len(output_stracks)) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings 这里首先通过backbone获取到对应的各个head的输出,接着进行后处理及置信度过滤(NMS),将新的目标加入轨迹 ''' with torch.no_grad(): output = self.model(im_blob)[-1] # 检测网络的检测结果 hm = output['hm'].sigmoid_() # 检测网络输出的热力图 wh = output['wh'] # 检测网络输出的目标宽高 id_feature = output['id'] # 检测网络输出的Re-ID特征 id_feature = F.normalize(id_feature, dim=1) reg = output[ 'reg'] if self.opt.reg_offset else None # 检测网络输出的目标中心offset # 检测的det res(bb, score, clses, ID)以及特征得分图的排序的有效index dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # 根据 index 选取 有效的Re-ID特征 id_feature = _tranpose_and_gather_feat(id_feature, inds) # 去除那些维度大小为1的维度 id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() # 对检测结果做后处理 dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] # 检测置信度阈值过滤,得到有效的目标和对应的Re-ID特征 remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections 对每个检测目标转化为跟踪对象,并绑定检测结果等属性''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding 1. 将[activated_stracks lost_stracks]融合成strack_pool 2. detections和strack_pool根据feats计算外观cost矩阵,就是用feat计算cosine距离 3. 利用卡尔曼算法预测strack_pool的新的mean,covariance、 4. 计算strack_pool和detection的距离cost矩阵,并将大于距离阈值的外观cost矩阵赋值为inf 5. 利用匈牙利算法进行匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: strack_pool中的track_state==tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks strack_pool中的track_state!=tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入refind_stracks b. 未成功匹配: 得到新的detections,r_tracked_stracks ''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) STrack.multi_predict(strack_pool) # 卡尔曼预测 dists = matching.embedding_distance( strack_pool, detections) # 计算新检测出来的目标detections和strack_pool之间的cosine距离 #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion( self.kalman_filter, dists, strack_pool, detections ) # 利用卡尔曼计算strack_pool和detection的距离cost,并将大于距离阈值的外观cost矩阵赋值为inf(距离约束) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7 ) # LAPJV匹配 // 将跟踪框和检测框进行匹配 // matches是匹配对索引,u_track是未匹配的tracker的索引,u_detection是未匹配的检测目标索引 for itracked, idet in matches: # matches:63*2 , 63:匹配成对个数,2:第一列为tracked_tracker索引,第二列为detection的索引 track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) # 匹配的tracker和detection,更新特征和卡尔曼状态 activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) # 如果是在lost中的,就重新激活 refind_stracks.append(track) ''' Step 3: Second association, with IOU 对余弦距离未匹配剩下的detections,r_tracked_stracks进行IOU匹配 1. detections和r_tracked_stracks计算IOU cost矩阵 2. 针对IOU cost进行匈牙利匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: r_tracked_stracks中的track_state==tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks r_tracked_stracks中的track_state!=tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入refind_stracks b. 未成功匹配: r_tracked_stracks中的状态track_state不为lost的,改为lost detections再遗留到下一步进行继续匹配 ''' detections = [detections[i] for i in u_detection] # u_detection是上步未匹配的detection的索引 r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] # 上步没有匹配的且是跟踪状态的tracker dists = matching.iou_distance(r_tracked_stracks, detections) # 计算IOU cost矩阵 matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) # 针对IOU cost进行LAPJV匹配 for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append( track) # 将和r_tracked_stracks iou未匹配的剩下的tracker的状态改为lost ''' Deal with unconfirmed tracks, usually tracks with only one beginning frame 上一步遗留的detection与unconfirmed_stracks进行IOU匹配 1. 计算IOU cost矩阵 2. 匈牙利匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV) a. 能匹配成功: 更新 unconfirmed_stracks,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks b. 未成功匹配: unconfirmed_stracks直接计入removed_stracks 不能匹配的detections,再遗留到下一步 ''' detections = [detections[i] for i in u_detection ] # 将cosine/iou 未匹配的detection和unconfirmed_tracker进行匹配 dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update( detections[idet], self.frame_id ) # 更新 unconfirmed_stracks,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append( track) # unconfirmed_stracks直接计入removed_stracks """ Step 4: Init new stracks 上一步遗留的detections,初始化成新的tracker,计入activated_stracks """ for inew in u_detection: # 对cosine/iou/uncofirmed_tracker都未匹配的detection重新初始化成一个新的tracker track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) # 激活track,第一帧的activated=T,其他为False activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: # 消失 max_time_lost 帧之后,计入removed_stracks,删除 track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] # 筛出tracked状态的tracker self.tracked_stracks = joint_stracks( self.tracked_stracks, activated_starcks) # 向self.tracked_stacks中添加新的detection self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # 重新匹配出的trackers self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update_sep(self, im_blob, img0, conf_thres=None): self.frame_id += 1 activated_stracks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets_classes = self.merge_outputs([dets]) #[1] # dets = np.concatenate( # [dets[i] for i in range(1, self.opt.num_classes+1)], axis=0) output_stracks = [] id_feature_orig = id_feature.copy() start_idx = 0 for i_class in range(1, 9): #self.opt.num_classes + 1): activated_stracks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] dets = dets_classes[i_class] if conf_thres is None: remain_inds = dets[:, 4] > self.opt.conf_thres else: remain_inds = dets[:, 4] > conf_thres[i_class - 1] dets = dets[remain_inds] # id_feature = id_feature_orig[start_idx:start_idx+len(remain_inds)][remain_inds] id_feature = id_feature_orig[start_idx:start_idx + len(dets_classes[i_class] )][remain_inds] # start_idx = start_idx+len(remain_inds) start_idx = start_idx + len(dets_classes[i_class]) # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30, class_id=i_class - 1) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks_sp[i_class]: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks_sp[i_class]) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_stracks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_stracks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_stracks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_stracks.append(track) """ Step 5: Update state""" for track in self.lost_stracks_sp[i_class]: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks_sp[i_class] = [ t for t in self.tracked_stracks_sp[i_class] if t.state == TrackState.Tracked ] self.tracked_stracks_sp[i_class] = joint_stracks( self.tracked_stracks_sp[i_class], activated_stracks) self.tracked_stracks_sp[i_class] = joint_stracks( self.tracked_stracks_sp[i_class], refind_stracks) self.lost_stracks_sp[i_class] = sub_stracks( self.lost_stracks_sp[i_class], self.tracked_stracks_sp[i_class]) self.lost_stracks_sp[i_class].extend(lost_stracks) self.lost_stracks_sp[i_class] = sub_stracks( self.lost_stracks_sp[i_class], self.removed_stracks_sp[i_class]) self.removed_stracks_sp[i_class].extend(removed_stracks) self.tracked_stracks_sp[i_class], self.lost_stracks_sp[ i_class] = remove_duplicate_stracks( self.tracked_stracks_sp[i_class], self.lost_stracks_sp[i_class]) # get scores of lost tracks # output_stracks = [track for track in self.tracked_stracks if track.is_activated] output_stracks.extend([ track for track in self.tracked_stracks_sp[i_class] if track.is_activated ]) # print('iclass {}, nb detected {}, nb output_stracks {}, nb tracked_stracks {}, nb lost_stracks {}, nb removed_stracks {}'\ # .format(i_class, len(dets), len(output_stracks), len(self.tracked_stracks_sp[i_class]), len(self.lost_stracks_sp[i_class]), len(self.removed_stracks_sp[i_class]))) # logger.debug('===========Frame {}=========='.format(self.frame_id)) # logger.debug('Activated: {}'.format([track.track_id for track in activated_stracks])) # logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) # logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) # logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
meta = { 'c': c, 's': s, 'out_height': inp_height // down_ratio, 'out_width': inp_width // down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=ltrb, K=Kt) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = post_process(dets, meta) dets = merge_outputs([dets])[1] remain_inds = dets[:, 4] > conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis person_count += len(dets) for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) dists = matching.embedding_distance( strack_pool, detections) # 计算新检测出来的目标和tracked_tracker之间的cosine距离 STrack.multi_predict(strack_pool) # 卡尔曼预测 dists = matching.fuse_motion( self.kalman_filter, dists, strack_pool, detections) # 利用卡尔曼计算detection和pool_stacker直接的距离代价 matches, u_track, u_detection = matching.linear_assignment( dists, thresh=0.7) # 匈牙利匹配 // 将跟踪框和检测框进行匹配 // u_track是未匹配的tracker的索引, for itracked, idet in matches: # matches:63*2 , 63:detections的维度,2:第一列为tracked_tracker索引,第二列为detection的索引 track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update( det, self.frame_id) # 匹配的pool_tracker和detection,更新特征和卡尔曼状态 activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) # 如果是在lost中的,就重新激活 refind_stracks.append(track) ''' Step 3: Second association, with IOU''' """ 在余弦距离未匹配的detection和tracker重新用iou进行匹配 """ detections = [detections[i] for i in u_detection] # u_detection是未匹配的detection的索引 r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate( det, self.frame_id, new_id=False) # 前面已经限定了是TrackState.Tracked,这里是不用运行到的。 refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append( track) # 将和tracked_tracker iou未匹配的tracker的状态改为lost temp = 1 '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection ] # 将cosine/iou未匹配的detection和unconfirmed_tracker进行匹配 dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: # 对cosine/iou/uncofirmed_tracker都未匹配的detection重新初始化一个unconfimed_tracker track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) # 激活track,第一帧的activated=T,其他为False activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: # 消失15帧之后 track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] # 筛出tracked状态的tracker self.tracked_stracks = joint_stracks( self.tracked_stracks, activated_starcks) # 向self.tracked_stacks中添加新的detection self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) # 重新匹配出的trackers self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio } ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): if hasattr(self.model, 'relation'): outputs, stuff = self.model(im_blob) det_heads = set(['wh', 'hm', 'reg']) trk_heads = set(['id']) for head in (set(self.model.backend.heads) & det_heads): outputs[head] = getattr(self.model.backend, head)(outputs['raw']) # for head in (set(self.model.heads) & trk_heads): # outputs[head] = getattr(self.model, head)(outputs['raw_trk']) # del outputs['raw_trk'] del outputs['raw'] output = outputs if hasattr(self.model.relation, 'loss'): cur_feats = stuff[-2] self.model.relation.lock.acquire() self.model.relation.feature_bank.append( cur_feats.detach().cpu()) self.model.relation.lock.release() else: output = self.model(im_blob)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > self.opt.conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] self.inputs_embs.append((dets, id_feature)) # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [ STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature) ] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.iou_distance(strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [ strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked ] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment( dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [ t for t in self.tracked_stracks if t.state == TrackState.Tracked ] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [ track for track in self.tracked_stracks if track.is_activated ] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format( [track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format( [track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format( [track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format( [track.track_id for track in removed_stracks])) return output_stracks
def update(self, im_blob, img0, p_crops, p_crops_lengths, edge_index, gnn_output_layer=-1, p_imgs=None, conf_thres=0.3): self.frame_id += 1 activated_starcks = [] refind_stracks = [] lost_stracks = [] removed_stracks = [] width = img0.shape[1] height = img0.shape[0] inp_height = im_blob.shape[2] inp_width = im_blob.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = {'c': c, 's': s, 'out_height': inp_height // self.opt.down_ratio, 'out_width': inp_width // self.opt.down_ratio} ''' Step 1: Network forward, get detections & embeddings''' with torch.no_grad(): output = self.model(im_blob, p_crops, p_crops_lengths, edge_index, p_imgs=p_imgs)[gnn_output_layer] if type(output) is list: output = output[-1] hm = output['hm'].sigmoid_() wh = output['wh'] id_feature = output['id'] id_feature = F.normalize(id_feature, dim=1) reg = output['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) id_feature = _tranpose_and_gather_feat(id_feature, inds) id_feature = id_feature.squeeze(0) id_feature = id_feature.cpu().numpy() if self.viz_attention and self.frame_id == self.opt.vis_attn_frame: # vis attention attn = output['p'] node0_neighbor_idx = output['node0_neighbor_idx'] keep = torch.where(attn > self.opt.vis_attn_thres)[0] self.visualize_centers(im_blob, keep, node0_neighbor_idx, attn, output, p_imgs) dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] # remain_inds = dets[:, 4] > self.opt.conf_thres remain_inds = dets[:, 4] > conf_thres dets = dets[remain_inds] id_feature = id_feature[remain_inds] # vis ''' for i in range(0, dets.shape[0]): bbox = dets[i][0:4] cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) cv2.imshow('dets', img0) cv2.waitKey(0) id0 = id0-1 ''' if len(dets) > 0: '''Detections''' detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(dets[:, :5], id_feature)] else: detections = [] ''' Add newly detected tracklets to tracked_stracks''' unconfirmed = [] tracked_stracks = [] # type: list[STrack] for track in self.tracked_stracks: if not track.is_activated: unconfirmed.append(track) else: tracked_stracks.append(track) ''' Step 2: First association, with embedding''' strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) # Predict the current location with KF #for strack in strack_pool: #strack.predict() STrack.multi_predict(strack_pool) dists = matching.embedding_distance(strack_pool, detections) #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections) dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: track = strack_pool[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(detections[idet], self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) ''' Step 3: Second association, with IOU''' detections = [detections[i] for i in u_detection] r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] dists = matching.iou_distance(r_tracked_stracks, detections) matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) for itracked, idet in matches: track = r_tracked_stracks[itracked] det = detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_starcks.append(track) else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) for it in u_track: track = r_tracked_stracks[it] if not track.state == TrackState.Lost: track.mark_lost() lost_stracks.append(track) '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) for itracked, idet in matches: unconfirmed[itracked].update(detections[idet], self.frame_id) activated_starcks.append(unconfirmed[itracked]) for it in u_unconfirmed: track = unconfirmed[it] track.mark_removed() removed_stracks.append(track) """ Step 4: Init new stracks""" for inew in u_detection: track = detections[inew] # if track.score < self.det_thresh: if track.score < conf_thres: continue track.activate(self.kalman_filter, self.frame_id) activated_starcks.append(track) """ Step 5: Update state""" for track in self.lost_stracks: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() removed_stracks.append(track) # print('Ramained match {} s'.format(t4-t3)) self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) self.lost_stracks.extend(lost_stracks) self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) self.removed_stracks.extend(removed_stracks) self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) # get scores of lost tracks output_stracks = [track for track in self.tracked_stracks if track.is_activated] logger.debug('===========Frame {}=========='.format(self.frame_id)) logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) return output_stracks
def forward(self, x, img_path, p_crops, p_crops_lengths, edge_index, p_imgs=None): """ forward function of the GNN detTrack module :param x: input image of (N, 3, im_h, im_w) :param p_crops: input image crops of previous frame corresponding to each input image, (∑_i n_crops_i, 64) :param p_crops_lengths: lengths of the number of previous crops for each batch image (N) :param edge_index: list of tensors with length (N), each element of which has a shape of (2, n_edges_i) :return: """ # Get the current image features (N, C, H, W) img0 = cv2.imread(img_path) width = img0.shape[1] height = img0.shape[0] inp_height = x.shape[2] inp_width = x.shape[3] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // 4, 'out_width': inp_width // 4 } y = self.backbone_forward(x)[-1] hm = y['hm'].sigmoid_() wh = y['wh'] id_feature = y['id'] reg = y['reg'] if self.opt.reg_offset else None dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) dets = self.post_process(dets, meta) dets = self.merge_outputs([dets])[1] remain_inds = dets[:, 4] > 0.4 dets = dets[remain_inds] boxes = dets[:, 0:4].copy() _, h, w = x.shape boxes = xywh2xyxy(boxes) boxes = boxes * np.array([w, h, w, h]) crops = torchvision.ops.roi_align( input=x.unsqueeze(0), boxes=[torch.from_numpy(boxes).float()], output_size=(96, 32)) return crops x_edge_index = self.build_edge_index_full( len(crops), self.default_backbone_feature_resolution[0] * self.default_backbone_feature_resolution[1]) y_x_crops_list = self.crop_features_forward(crops=crops, crops_lengths=len(crops), imgs=x) y_p_crops_list = self.crop_features_forward( crops=p_crops, crops_lengths=p_crops_lengths, imgs=p_imgs) edge_index = [edge_index, x_edge_index] edge_attr = torch.cat((edge_index, x_edge_index), dim=1) xy = y x_is_img = len(xy.shape) == 4 if self.node_cnn is not None and x_is_img: xy = self.node_cnn(xy) emb_dists = nn.functional.pairwise_distance( xy[edge_index[0]], xy[edge_index[1]]).view(-1, 1) edge_attr = torch.cat((edge_attr, emb_dists), dim=1) # Encoding features step latent_edge_feats, latent_node_feats = self.encoder(edge_attr, xy) initial_edge_feats = latent_edge_feats initial_node_feats = latent_node_feats # During training, the feature vectors that the MPNetwork outputs for the last self.num_class_steps message # passing steps are classified in order to compute the loss. first_class_step = self.num_enc_steps - self.num_class_steps + 1 outputs_dict = {'classified_edges': []} for step in range(1, self.num_enc_steps + 1): # Reattach the initially encoded embeddings before the update if self.reattach_initial_edges: latent_edge_feats = torch.cat( (initial_edge_feats, latent_edge_feats), dim=1) if self.reattach_initial_nodes: latent_node_feats = torch.cat( (initial_node_feats, latent_node_feats), dim=1) # Message Passing Step latent_node_feats, latent_edge_feats = self.MPNet( latent_node_feats, edge_index, latent_edge_feats) if step >= first_class_step: # Classification Step dec_edge_feats, _ = self.classifier(latent_edge_feats) outputs_dict['classified_edges'].append(dec_edge_feats) if self.num_enc_steps == 0: dec_edge_feats, _ = self.classifier(latent_edge_feats) outputs_dict['classified_edges'].append(dec_edge_feats) return outputs_dict