def forward(self, in_x, rois, in_ref_x=None, ref_rois=None): # x: [N_0, C] ref_x: [N_1, C] # rois: [N_0, 4] ref_rois: [N_1, 4] if in_ref_x is None: in_ref_x = in_x ref_rois = rois N_0, C = in_x.shape N_1, C_1 = in_ref_x.shape assert C == C_1 x = in_x.view(N_0, C, 1, 1) ref_x = in_ref_x.view(N_0, C, 1, 1) for i, embed_conv in enumerate(self.embed_convs): x = embed_conv(x) if not self.share_embed_convs: ref_x = self.ref_embed_convs[i](ref_x) else: ref_x = embed_conv(ref_x) # [N, G, C // G] x = x.view(N_0, self.groups, -1) ref_x = ref_x.view(N_1, self.groups, -1) # [G, N_0, C // G] x = x.permute(1, 0, 2) # [G, C // G, N_1] ref_x = ref_x.permute(1, 2, 0) # [G, N_0, N_1] matrix = torch.matmul(x, ref_x) matrix /= x.shape[-1]**0.5 # [N_0, G, N_1] matrix = matrix.permute(1, 0, 2) if self.with_loc: # [N_0, N_1] ious = bbox_overlaps(rois[:, 1:], ref_rois[:, 1:]) ious = ious.view(N_0, 1, N_1).expand(N_0, self.groups, N_1) matrix += torch.log(ious + 1e-6) # [N_0, G, N_1] matrix = matrix.softmax(dim=2) # [N_0 * G, N_1] matrix = matrix.view(-1, N_1) # [N_0 * G, C] = [N_0 * G, N_1] * [N_1, C] y = torch.matmul(matrix, in_ref_x) # [N_0, C * G] y = y.view(N_0, -1, 1, 1) # [N_0, C] y = self.conv_out(y).view(N_0, -1) return y
def iou_loss(pred, target, eps=1e-6): """IoU loss. Computing the IoU loss between a set of predicted bboxes and target bboxes. The loss is calculated as negative log of IoU. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): Corresponding gt bboxes, shape (n, 4). eps (float): Eps to avoid log(0). Return: Tensor: Loss tensor. """ ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps) loss = -ious.log() return loss
def match(self, bboxes: torch.Tensor, labels: torch.Tensor, boxes_3d: torch.Tensor, depth_uncertainty: torch.Tensor, position: torch.Tensor, rotation: torch.Tensor, embeds: torch.Tensor, cur_frame: int, pure_det: bool = False): """Match incoming detection results with embedding and 3D infos Args: bboxes (torch.tensor): (N, 5), [x1, y1, x2, y2, conf] labels (torch.tensor): (N,) boxes_3d (torch.tensor): (N, 7), 3D information stored in world coordinates with the format [X, Y, Z, theta, h, w, l] depth_uncertainty (torch.tensor): (N, ), confidence in depth estimation position (torch.tensor): (3, ), camera position rotation (torch.tensor): (3, 3), camera rotation embeds (torch.tensor): (N, C), extracted box feature cur_frame (int): indicates the frame index pure_det (bool): output pure detection. Defaults False. Raises: NotImplementedError: raise if self.match_metric not found Returns: list: A list of matched bbox, labels, boxes_3d and embeds """ if depth_uncertainty is None or not self.with_depth_uncertainty: depth_uncertainty = boxes_3d.new_ones((boxes_3d.shape[0], 1)) _, inds = (bboxes[:, -1] * depth_uncertainty.flatten()).sort(descending=True) bboxes = bboxes[inds, :] labels = labels[inds] embeds = embeds[inds, :] boxes_3d = boxes_3d[inds] depth_uncertainty = depth_uncertainty[inds] if pure_det: valids = bboxes.new_ones((bboxes.size(0)), dtype=torch.bool) ids = torch.arange(self.num_tracklets, self.num_tracklets + bboxes.size(0), dtype=torch.long) self.num_tracklets += bboxes.size(0) return bboxes, labels, boxes_3d, ids, inds, valids # duplicate removal for potential backdrops and cross classes valids = bboxes.new_ones((bboxes.size(0))) ious = bbox_overlaps(bboxes[:, :-1], bboxes[:, :-1]) for i in range(1, bboxes.size(0)): thr = self.nms_backdrop_iou_thr if bboxes[ i, -1] < self.obj_score_thr else self.nms_class_iou_thr if (ious[i, :i] > thr).any(): valids[i] = 0 valids = valids == 1 bboxes = bboxes[valids, :] labels = labels[valids] embeds = embeds[valids, :] boxes_3d = boxes_3d[valids] depth_uncertainty = depth_uncertainty[valids] # init ids container ids = torch.full((bboxes.size(0), ), -1, dtype=torch.long) # match if buffer is not empty if bboxes.size(0) > 0 and not self.empty: memo_bboxes, memo_labels, memo_boxes_3d, \ memo_trackers, memo_embeds, memo_ids, memo_vs = self.memo mmcv.check_accum_time('predict', counting=True) memo_boxes_3d_predict = memo_boxes_3d.detach().clone() for ind, memo_tracker in enumerate(memo_trackers): memo_velo = memo_tracker.predict( update_state=memo_tracker.age != 0) memo_boxes_3d_predict[ind, :3] += memo_boxes_3d.new_tensor( memo_velo[7:]) mmcv.check_accum_time('predict', counting=False) if self.with_bbox_iou: def get_xy_box(boxes_3d_world): box_x_cen = boxes_3d_world[:, 0] box_y_cen = boxes_3d_world[:, 1] box_width = boxes_3d_world[:, 5] box_length = boxes_3d_world[:, 6] dets_xy_box = torch.stack([ box_x_cen - box_width / 2.0, box_y_cen - box_length / 2.0, box_x_cen + box_width / 2.0, box_y_cen + box_length / 2.0 ], dim=1) return dets_xy_box if self.track_bbox_iou == 'box2d': scores_iou = bbox_overlaps(bboxes[:, :-1], memo_bboxes[:, :-1]) elif self.track_bbox_iou == 'bev': dets_xy_box = get_xy_box(boxes_3d) memo_dets_xy_box = get_xy_box(memo_boxes_3d_predict) scores_iou = bbox_overlaps(dets_xy_box, memo_dets_xy_box) elif self.track_bbox_iou == 'box3d': depth_weight = F.pairwise_distance( boxes_3d[..., None], memo_boxes_3d_predict[..., None].transpose(2, 0)) scores_iou = torch.exp(-depth_weight / 10.0) elif self.track_bbox_iou == 'box2d_depth_aware': depth_weight = F.pairwise_distance( boxes_3d[..., None], memo_boxes_3d_predict[..., None].transpose(2, 0)) scores_iou = torch.exp(-depth_weight / 10.0) scores_iou *= bbox_overlaps(bboxes[:, :-1], memo_bboxes[:, :-1]) else: raise NotImplementedError else: scores_iou = bboxes.new_ones( [bboxes.size(0), memo_bboxes.size(0)]) if self.with_deep_feat: def compute_quasi_dense_feat_match(embeds, memo_embeds): if self.match_metric == 'cycle_softmax': feats = torch.mm(embeds, memo_embeds.t()) d2t_scores = feats.softmax(dim=1) t2d_scores = feats.softmax(dim=0) scores_feat = (d2t_scores + t2d_scores) / 2 elif self.match_metric == 'softmax': feats = torch.mm(embeds, memo_embeds.t()) scores_feat = feats.softmax(dim=1) elif self.match_metric == 'cosine': scores_feat = torch.mm( F.normalize(embeds, p=2, dim=1), F.normalize(memo_embeds, p=2, dim=1).t()) else: raise NotImplementedError return scores_feat scores_feat = compute_quasi_dense_feat_match( embeds, memo_embeds) else: scores_feat = scores_iou.new_ones(scores_iou.shape) # Match with depth ordering if self.with_depth_ordering: def compute_boxoverlap_with_depth(obsv_boxes_3d, memo_boxes_3d, memo_vs): # Sum up all the available region of each tracker if self.depth_match_metric == 'centroid': depth_weight = F.pairwise_distance( obsv_boxes_3d[..., :3, None], memo_boxes_3d[..., :3, None].transpose(2, 0)) depth_weight = torch.exp(-depth_weight / 10.0) elif self.depth_match_metric == 'cosine': match_corners_observe = tu.worldtocamera_torch( obsv_boxes_3d[:, :3], position, rotation) match_corners_predict = tu.worldtocamera_torch( memo_boxes_3d[:, :3], position, rotation) depth_weight = F.cosine_similarity( match_corners_observe[..., None], match_corners_predict[..., None].transpose(2, 0)) depth_weight += 1.0 depth_weight /= 2.0 elif self.depth_match_metric == 'pure_motion': # Moving distance should be aligned # V_observed-tracked vs. V_velocity depth_weight = F.pairwise_distance( obsv_boxes_3d[..., :3, None] - memo_boxes_3d[..., :3, None].transpose(2, 0), memo_vs[..., :3, None].transpose(2, 0)) depth_weight = torch.exp(-depth_weight / 5.0) # Moving direction should be aligned # Set to 0.5 when two vector not within +-90 degree cos_sim = F.cosine_similarity( obsv_boxes_3d[..., :2, None] - memo_boxes_3d[..., :2, None].transpose(2, 0), memo_vs[..., :2, None].transpose(2, 0)) cos_sim += 1.0 cos_sim /= 2.0 depth_weight *= cos_sim elif self.depth_match_metric == 'motion': centroid_weight = F.pairwise_distance( obsv_boxes_3d[..., :3, None], memo_boxes_3d_predict[..., :3, None].transpose(2, 0)) centroid_weight = torch.exp(-centroid_weight / 10.0) # Moving distance should be aligned # V_observed-tracked vs. V_velocity motion_weight = F.pairwise_distance( obsv_boxes_3d[..., :3, None] - memo_boxes_3d[..., :3, None].transpose(2, 0), memo_vs[..., :3, None].transpose(2, 0)) motion_weight = torch.exp(-motion_weight / 5.0) # Moving direction should be aligned # Set to 0.5 when two vector not within +-90 degree cos_sim = F.cosine_similarity( obsv_boxes_3d[..., :2, None] - memo_boxes_3d[..., :2, None].transpose(2, 0), memo_vs[..., :2, None].transpose(2, 0)) cos_sim += 1.0 cos_sim /= 2.0 depth_weight = cos_sim * centroid_weight + ( 1.0 - cos_sim) * motion_weight else: raise NotImplementedError return depth_weight if self.depth_match_metric == 'motion': scores_depth = compute_boxoverlap_with_depth( boxes_3d, memo_boxes_3d, memo_vs) else: scores_depth = compute_boxoverlap_with_depth( boxes_3d, memo_boxes_3d_predict, memo_vs) else: scores_depth = scores_iou.new_ones(scores_iou.shape) if self.with_cats: cat_same = labels.view(-1, 1) == memo_labels.view(1, -1) scores_cats = cat_same.float() else: scores_cats = scores_iou.new_ones(scores_iou.shape) scores = self.bbox_affinity_weight * scores_iou * scores_depth + \ self.feat_affinity_weight * scores_feat scores /= (self.bbox_affinity_weight + self.feat_affinity_weight) scores *= (scores_iou > 0.0).float() scores *= (scores_depth > 0.0).float() scores *= scores_cats # Assign matching mmcv.check_accum_time('matching', counting=True) if self.match_algo == 'greedy': for i in range(bboxes.size(0)): conf, memo_ind = torch.max(scores[i, :], dim=0) tid = memo_ids[memo_ind] # Matching confidence if conf > self.match_score_thr: # Update existing tracklet if tid > -1: # Keep object with high 3D objectness if bboxes[i, -1] * depth_uncertainty[ i] > self.obj_score_thr: ids[i] = tid scores[:i, memo_ind] = 0 scores[i + 1:, memo_ind] = 0 else: # Reduce FP w/ low objectness but high match conf if conf > self.nms_conf_thr: ids[i] = -2 elif self.match_algo == 'hungarian': # Hungarian matched_indices = linear_assignment(-scores.cpu().numpy()) for idx in range(len(matched_indices[0])): i = matched_indices[0][idx] memo_ind = matched_indices[1][idx] conf = scores[i, memo_ind] tid = memo_ids[memo_ind] if conf > self.match_score_thr and tid > -1: # Keep object with high 3D objectness if bboxes[i, -1] * depth_uncertainty[ i] > self.obj_score_thr: ids[i] = tid scores[:i, memo_ind] = 0 scores[i + 1:, memo_ind] = 0 else: # Reduce FP w/ low objectness but high match conf if conf > self.nms_conf_thr: ids[i] = -2 del matched_indices mmcv.check_accum_time('matching', counting=False) if self._debug: bbox_inds = scores.max(1).indices print("\nTotal:\n" f"Scores: {scores.max(1).values.cpu().numpy()}\n" f"Indices: {scores.max(1).indices.cpu().numpy()}\n" "IoU:\n" f"Scores: {scores_iou.max(1).values.cpu().numpy()}\n" f"Indices: {scores_iou.max(1).indices.cpu().numpy()}\n" f"Feat:\n" f"Scores: {scores_feat.max(1).values.cpu().numpy()}\n" f"Indices: {scores_feat.max(1).indices.cpu().numpy()}\n" f"Depth:\n" f"Scores: {scores_depth.max(1).values.cpu().numpy()}\n" f"Indices: {scores_depth.max(1).indices.cpu().numpy()}") print("Uncertainty:\n", depth_uncertainty.flatten().cpu().numpy()) print(f"Memo: {memo_boxes_3d.shape}\tMemo Ids: {memo_ids}\n" f"{memo_boxes_3d[bbox_inds].cpu().numpy()}") print(f"Velo:\n{memo_vs[bbox_inds].cpu().numpy()}") print( f"Pred:\n{memo_boxes_3d_predict[bbox_inds].cpu().numpy()}") print(f"Obsv: {boxes_3d.shape}\tObsv Ids: {ids}\n" f"{boxes_3d.cpu().numpy()}") new_inds = (ids == -1) & (bboxes[:, 4] > self.init_score_thr).cpu() num_news = new_inds.sum() ids[new_inds] = torch.arange(self.num_tracklets, self.num_tracklets + num_news, dtype=torch.long) self.num_tracklets += num_news self.update_memo(ids, bboxes, boxes_3d, depth_uncertainty, embeds, labels, cur_frame) update_bboxes = bboxes.detach().clone() update_labels = labels.detach().clone() update_boxes_3d = boxes_3d.detach().clone() for tid in ids[ids > -1]: update_boxes_3d[ids == tid] = self.tracklets[int(tid)]['box_3d'] update_ids = ids.detach().clone() if self._debug: print( f"Updt: {update_boxes_3d.shape}\tUpdt ID: {update_ids.cpu().numpy()}\n" f"{update_boxes_3d.cpu().numpy()}") return update_bboxes, update_labels, update_boxes_3d, update_ids, inds, valids
def update_memo(self, ids, bboxes, boxes_3d, depth_uncertainty, embeds, labels, cur_frame): tracklet_inds = ids > -1 # update memo for tid, bbox, box_3d, d_uncertainty, embed, label in zip( ids[tracklet_inds], bboxes[tracklet_inds], boxes_3d[tracklet_inds], depth_uncertainty[tracklet_inds], embeds[tracklet_inds], labels[tracklet_inds]): tid = int(tid) if tid in self.tracklets.keys(): self.tracklets[tid]['bbox'] = bbox mmcv.check_accum_time('update', counting=True) self.tracklets[tid]['tracker'].update( box_3d.cpu().numpy(), d_uncertainty.cpu().numpy()) mmcv.check_accum_time('update', counting=False) tracker_box = self.tracklets[tid]['tracker'].get_state()[:7] pd_box_3d = box_3d.new_tensor(tracker_box) velocity = (pd_box_3d - self.tracklets[tid]['box_3d']) / ( cur_frame - self.tracklets[tid]['last_frame']) self.tracklets[tid]['box_3d'] = pd_box_3d self.tracklets[tid]['embed'] += self.memo_momentum * ( embed - self.tracklets[tid]['embed']) self.tracklets[tid]['label'] = label self.tracklets[tid]['velocity'] = ( self.tracklets[tid]['velocity'] * self.tracklets[tid]['acc_frame'] + velocity) / (self.tracklets[tid]['acc_frame'] + 1) self.tracklets[tid]['last_frame'] = cur_frame self.tracklets[tid]['acc_frame'] += 1 else: built_tracker = self.tracker_model( self.device, self.lstm, box_3d.cpu().numpy(), d_uncertainty.cpu().numpy() ) if self.tracker_model_name == 'LSTM3DTracker' else self.tracker_model( box_3d.cpu().numpy(), d_uncertainty.cpu().numpy()) self.tracklets[tid] = dict(bbox=bbox, box_3d=box_3d, tracker=built_tracker, embed=embed, label=label, last_frame=cur_frame, velocity=torch.zeros_like(box_3d), acc_frame=0) # Handle vanished tracklets for tid in self.tracklets: if cur_frame > self.tracklets[tid]['last_frame'] and tid > -1: self.tracklets[tid]['box_3d'][:self.loc_dim] = self.tracklets[ tid]['box_3d'].new_tensor( self.tracklets[tid]['tracker'].predict() [:self.loc_dim]) # Add backdrops backdrop_inds = torch.nonzero(ids == -1).squeeze(1) ious = bbox_overlaps(bboxes[backdrop_inds, :-1], bboxes[:, :-1]) for i, ind in enumerate(backdrop_inds): if (ious[i, :ind] > self.nms_backdrop_iou_thr).any(): backdrop_inds[i] = -1 backdrop_inds = backdrop_inds[backdrop_inds > -1] backdrop_tracker = [ self.tracker_model(self.device, self.lstm, boxes_3d[bd_ind].cpu().numpy(), depth_uncertainty[bd_ind].cpu().numpy()) if self.tracker_model_name == 'LSTM3DTracker' else self.tracker_model( boxes_3d[bd_ind].cpu().numpy(), depth_uncertainty[bd_ind].cpu().numpy()) for bd_ind in backdrop_inds ] self.backdrops.insert( 0, dict(bboxes=bboxes[backdrop_inds], boxes_3d=boxes_3d[backdrop_inds], tracker=backdrop_tracker, embeds=embeds[backdrop_inds], labels=labels[backdrop_inds])) # pop memo invalid_ids = [] for k, v in self.tracklets.items(): if cur_frame - v['last_frame'] >= self.memo_tracklet_frames: invalid_ids.append(k) for invalid_id in invalid_ids: self.tracklets.pop(invalid_id) if len(self.backdrops) > self.memo_backdrop_frames: self.backdrops.pop()
def cal_loss_embed(self, asso_probs, cos_probs, ids, id_weights, ref_sampling_results, cfg): losses = dict() batch_size = len(ids) loss_asso = 0. loss_iou = 0. nelements = 0. # calculate per image loss for prob, cos_prob, cur_ids, cur_weights, res in zip( asso_probs, cos_probs, ids, id_weights, ref_sampling_results): valid_idx = torch.nonzero(cur_weights).squeeze() if len(valid_idx.size()) == 0: continue num_gt = 0 num_pos = 0 num_neg = 0 pids = [] for _res in res: ious = bbox_overlaps(_res.pos_bboxes, _res.gt_bboxes) pids.append(ious.max(dim=1)[1] + num_gt) num_gt += _res.gt_bboxes.size(0) num_pos += _res.pos_bboxes.size(0) if cfg.with_ref_neg: num_neg += _res.neg_bboxes.size(0) assert num_gt + num_pos + num_neg == prob.size(1) pids.insert(0, torch.arange(num_gt).long().to(prob.device)) if cfg.with_ref_neg: pids.append((torch.ones(num_neg).long() * -2).to(prob.device)) pids = torch.cat(pids, dim=0) pos_inds = (cur_ids.view(-1, 1) == pids.view(1, -1)).float() neg_inds = (cur_ids.view(-1, 1) != pids.view(1, -1)).float() exp_pos = (torch.exp(-1 * prob) * pos_inds).sum(dim=1) exp_neg = (torch.exp(prob.clamp(max=80)) * neg_inds).sum(dim=1) loss = torch.log(1 + exp_pos * exp_neg) loss_asso += (loss * cur_weights).sum() / cur_weights.sum() if self.loss_iou is not None: dists = torch.abs(cos_prob - pos_inds)**2 pos_points = torch.nonzero(pos_inds == 1) pos_dists = dists[pos_points[:, 0], pos_points[:, 1]] nelements += pos_dists.nelement() loss_iou += pos_dists.sum() # neg neg_inds = torch.nonzero(pos_inds == 0) if self.loss_iou['sample_ratio'] > -1: num_negs = pos_dists.nelement( ) * self.loss_iou['sample_ratio'] if len(neg_inds) < num_negs: num_negs = len(neg_inds) else: num_negs = len(neg_inds) nelements += num_negs if self.loss_iou['hard_mining']: _loss_neg = dists[neg_inds[:, 0], neg_inds[:, 1]].topk(num_negs)[0] else: neg_inds = self.random_choice(neg_inds, num_negs) _loss_neg = dists[neg_inds[:, 0], neg_inds[:, 1]] if self.loss_iou['margin'] > 0: _loss_neg *= (_loss_neg > self.loss_iou['margin']).float() loss_iou += _loss_neg.sum() # average losses['loss_asso'] = loss_asso / batch_size * self.loss_asso[ 'loss_weight'] if self.loss_iou is not None: losses['loss_iou'] = ( loss_iou / (nelements + 1e-6)) * self.loss_iou['loss_weight'] return losses
def analyze(self, img_meta, bboxes, labels, ids, depths=None, dims=None, alphas=None, cen_2ds=None, show=False, save=False, gt_cats=None): gt_bboxes, gt_labels, gt_ids, gt_ignores, \ gt_alphas, gt_rotys, gt_dims, gt_trans, gt_2dcs = self.loadGts( img_meta, gt_cats) track_inds = ids > -1 track_bboxes = bboxes[track_inds] track_labels = labels[track_inds] if depths is not None: track_depths = depths[track_inds] else: track_depths = None if dims is not None: track_dims = dims[track_inds] else: track_dims = None if alphas is not None: track_alphas = alphas[track_inds] else: track_alphas = None if cen_2ds is not None: track_2dcs = cen_2ds[track_inds] else: track_2dcs = None track_ids = ids[track_inds] if len(gt_ignores) > 0: ignore_inds = (bbox_overlaps(bboxes[:, :4], gt_ignores, mode='iof') > 0.5).any(dim=1) if track_bboxes.size(0) == 0: self.counter.num_fn += gt_bboxes.size(0) return if gt_bboxes.size(0) == 0: self.counter.num_fp += track_bboxes.size(0) if gt_ignores.size(0) > 0: self.counter.num_fp -= ignore_inds[track_inds].sum() return # init # [N, 6]: [x1, y1, x2, y2, class, id] self.counter.num_gt += gt_bboxes.size(0) fps = torch.ones(bboxes.size(0), dtype=torch.long) fns = torch.ones(gt_bboxes.size(0), dtype=torch.long) # false negatives after tracking filter track_fns = torch.ones(gt_bboxes.size(0), dtype=torch.long) idsw = torch.zeros(track_ids.size(0), dtype=torch.long) # fp & fn for raw detection results ious = bbox_overlaps(bboxes[:, :4], gt_bboxes[:, :4]) same_cat = labels.view(-1, 1) == gt_labels.view(1, -1) ious *= same_cat.float() max_ious, gt_inds = ious.max(dim=1) _, dt_inds = bboxes[:, -1].sort(descending=True) for dt_ind in dt_inds: iou, gt_ind = max_ious[dt_ind], gt_inds[dt_ind] if iou > 0.5 and fns[gt_ind] == 1: fns[gt_ind] = 0 if ids[dt_ind] > -1: track_fns[gt_ind] = 0 gt_bboxes[gt_ind, 4] = bboxes[dt_ind, -1] fps[dt_ind] = 0 else: if len(gt_ignores) > 0 and ignore_inds[dt_ind]: fps[dt_ind] = 0 gt_inds[dt_ind] = -2 else: gt_inds[dt_ind] = -1 track_gt_inds = gt_inds[track_inds] track_fps = fps[track_inds] for i, tid in enumerate(track_ids): tid = int(tid) gt_ind = track_gt_inds[i] if gt_ind == -1 or gt_ind == -2: continue gt_id = int(gt_ids[gt_ind]) if gt_id in self.id_maps.keys() and self.id_maps[gt_id] != tid: idsw[i] = 1 if gt_id not in self.id_maps.keys() and tid in self.id_maps.values( ): idsw[i] = 1 self.id_maps[gt_id] = tid fp_inds = track_fps == 1 fn_inds = track_fns == 1 idsw_inds = idsw == 1 self.counter.num_fp += fp_inds.sum() self.counter.num_fn += fn_inds.sum() self.counter.num_idsw += idsw_inds.sum() if show or save: vid_name = os.path.dirname( img_meta[0]['img_info']['file_name']).split('/')[-1] img_name = os.path.basename(img_meta[0]['img_info']['file_name']) # img = os.path.join( # self.data.img_prefix[img_meta[0]['img_info']['type']], # vid_name, img_name) img = img_meta[0]['img_info']['file_name'] save_path = os.path.join(self.out, 'analysis', vid_name) os.makedirs(save_path, exist_ok=True) save_file = os.path.join(save_path, img_name) if save else None img = imshow_3d_tracklets(img, track_bboxes[fp_inds].numpy(), track_labels[fp_inds].numpy(), depths=track_depths[fp_inds].numpy() if depths is not None else None, cen_2d=track_2dcs[fp_inds].numpy() if cen_2ds is not None else None, ids=track_ids[fp_inds].numpy(), color='red', show=False) img = imshow_3d_tracklets(img, gt_bboxes[fn_inds, :].numpy(), gt_labels[fn_inds].numpy(), depths=gt_trans[fn_inds, -1].numpy(), cen_2d=gt_2dcs[fn_inds, -1].numpy(), ids=gt_ids[fn_inds].numpy(), color='yellow', show=False) img = imshow_3d_tracklets(img, track_bboxes[idsw_inds].numpy(), track_labels[idsw_inds].numpy(), depths=track_depths[idsw_inds].numpy() if depths is not None else None, cen_2d=track_2dcs[idsw_inds].numpy() if cen_2ds is not None else None, ids=track_ids[idsw_inds].numpy(), color='cyan', show=show, out_file=save_file)