def non_max_suppression(prediction, conf_thresh=0.1, iou_thresh=0.6, merge=False, agnostic=False, multi_label=True, max_det=300): """Performs Non-Maximum Suppression (NMS) on inference results Args: prediction(torch.Tensor): shape=[bs.-1,no(85)] note: box cords (x,y,w,h) have been decoded into input size. Returns: a list(len=bs) with element's shape: nx6 (x1, y1, x2, y2, conf, cls) """ xc = prediction[..., 4] > conf_thresh # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height redundant = True # require redundant detections output = [None] * prediction.shape[0] # list len=bs for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[ xc[xi]] # if confidence score/ objectness < conf_thres, passed it # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thresh).nonzero( as_tuple=False).T # (i,j) i索引1 j索引2 # 一个Box选择置信度大于阈值的类别做预测, note: x[i, j + 5, None]==> x[i,j+5]???? x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only # best class only( 一个Box只选择其中置信度最高的类别) conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat( (box, conf, j.float()), 1)[conf.view(-1) > conf_thresh] # 二次筛选,排除掉最终的class_score<conf_thresh的标签 # Filter by class # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # 按照类别加入偏置量 ''' 按照类别拉大不同类别之间的box间距,为之后的maxtrix weight加权合并奠定基础(即加权合并主要在同类别的bbox之间进行) ''' boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = nms(boxes, scores, iou_thresh) if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thresh # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( 1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] return output
def non_max_suppression(prediction, conf_thresh=0.1, iou_thresh=0.6, merge=False, agnostic=False, multi_label=True, max_det=300): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ xc = prediction[..., 4] > conf_thresh # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height redundant = True # require redundant detections output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thresh).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thresh] # Filter by class # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = nms(boxes, scores, iou_thresh) if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thresh # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] return output
def step(self, blob): """This function should be called every timestep to perform tracking with a blob containing the image information. """ for t in self.tracks: # add current position to last_pos list t.last_pos.append(t.pos.clone()) t.track_count += 1 ########################### # Look for new detections # ########################### # self.obj_detect.load_image(blob['data'][0]) if self.public_detections: dets = blob['dets'].squeeze(dim=0) if dets.nelement() > 0: boxes, scores = self.obj_detect.predict_boxes( blob['img'], dets) else: boxes = scores = torch.zeros(0).cuda() else: boxes, scores = self.obj_detect.detect(blob['img']) if boxes.nelement() > 0: boxes = clip_boxes_to_image(boxes, blob['img'].shape[-2:]) # Filter out tracks that have too low person score inds = torch.gt(scores, self.detection_person_thresh).nonzero().view(-1) else: inds = torch.zeros(0).cuda() if inds.nelement() > 0: det_pos = boxes[inds] det_scores = scores[inds] else: det_pos = torch.zeros(0).cuda() det_scores = torch.zeros(0).cuda() ################## # Predict tracks # ################## num_tracks = 0 nms_inp_reg = torch.zeros(0).cuda() if len(self.tracks): # align if self.do_align: self.align(blob) # apply motion model if self.motion_model_cfg['enabled']: self.motion() self.tracks = [t for t in self.tracks if t.has_positive_area()] # regress person_scores = self.regress_tracks(blob) if len(self.tracks): # create nms input # nms here if tracks overlap keep = nms(self.get_pos(), person_scores, self.regression_nms_thresh) print(f"tracks:{len(self.tracks)}\n") print(f"keep tracks index:{keep}\n") self.tracks_to_inactive([ self.tracks[i] for i in list(range(len(self.tracks))) if i not in keep ]) if keep.nelement() > 0: if self.do_reid: new_features = self.get_appearances(blob) self.add_features(new_features) ##################### # Create new tracks # ##################### # !!! Here NMS is used to filter out detections that are already covered by tracks. This is # !!! done by iterating through the active tracks one by one, assigning them a bigger score # !!! than 1 (maximum score for detections) and then filtering the detections with NMS. # !!! In the paper this is done by calculating the overlap with existing tracks, but the # !!! result stays the same. if det_pos.nelement() > 0: keep = nms(det_pos, det_scores, self.detection_nms_thresh) det_pos = det_pos[keep] det_scores = det_scores[keep] # check with every track in a single run (problem if tracks delete each other) for t in self.tracks: nms_track_pos = torch.cat([t.pos, det_pos]) nms_track_scores = torch.cat( [torch.tensor([2.0]).to(det_scores.device), det_scores]) keep = nms(nms_track_pos, nms_track_scores, self.detection_nms_thresh) keep = keep[torch.ge(keep, 1)] - 1 det_pos = det_pos[keep] det_scores = det_scores[keep] if keep.nelement() == 0: break if det_pos.nelement() > 0: new_det_pos = det_pos new_det_scores = det_scores # try to reidentify tracks new_det_pos, new_det_scores, new_det_features = self.reid( blob, new_det_pos, new_det_scores) # add new if new_det_pos.nelement() > 0: self.add(new_det_pos, new_det_scores, new_det_features) #################### # Generate Results # #################### for t in self.tracks: if t.id not in self.results.keys(): self.results[t.id] = {} self.results[t.id][self.im_index] = np.concatenate( [t.pos[0].cpu().numpy(), np.array([t.score])]) for t in self.inactive_tracks: t.count_inactive += 1 self.inactive_tracks = [ t for t in self.inactive_tracks if t.has_positive_area() and t.count_inactive <= self.inactive_patience ] # for t in self.tracks: # t.track_count += 1 self.im_index += 1 self.last_image = blob['img'][0]
import torch import torchvision from torchvision.ops.boxes import nms ld = torch.load('nms_db') boxes = ld['boxes'] score = ld['score'] thres = 0.7 keep = nms(boxes, score, thres) print('FINISH')
def test_one_epoch(dataloader, model, yolo_loss, cfg): confidence = cfg.yolo.inf_confidence iou_threshold = cfg.yolo.inf_iou_threshold inp_dim = cfg.dataset.inp_dim yolo_loss.set_img_size(inp_dim) model.eval() results = [] dset_name = dataloader.dset_name torch.backends.cudnn.benchmark = True with torch.no_grad(): for batch_idx, (images, targets) in enumerate(dataloader): # measure data loading time images = images.to('cuda', non_blocking=True) targets = [{ k: v.to('cuda', non_blocking=True) for k, v in t.items() } for t in targets] out = model(images) predictions = yolo_loss(out) predictions[:, :, :4] = helper.get_abs_coord(predictions[:, :, :4]) score = predictions[:, :, 4] * (predictions[:, :, 5:].max(axis=2)[0]) pred_mask = score > confidence pred_conf = [(predictions[e][m]) for e, m in enumerate(pred_mask)] indices = [ boxes.nms(pred_conf[i][:, :4], pred_conf[i][:, 4], iou_threshold) for i in range(len(pred_conf)) ] pred_final = [ pred_conf[i][indices[i], :] for i in range(len(pred_conf)) ] pred_final = list(filter(lambda t: t.shape[0] != 0, pred_final)) for i, atrbs in enumerate(pred_final): xmin = atrbs[:, 0] / inp_dim * targets[i]['img_size'][1] ymin = atrbs[:, 1] / inp_dim * targets[i]['img_size'][0] xmax = atrbs[:, 2] / inp_dim * targets[i]['img_size'][1] ymax = atrbs[:, 3] / inp_dim * targets[i]['img_size'][0] w = xmax - xmin h = ymax - ymin scores = (atrbs[:, 4] * atrbs[:, 5:].max(axis=1)[0]).tolist() labels = (atrbs[:, 5:].max(axis=1)[1]) if dset_name == 'coco': labels = helper.torch80_to_91(labels).tolist() else: labels = (labels + 1).tolist() bboxes = torch.stack((xmin, ymin, w, h), axis=1) areas = (bboxes[:, 2] * bboxes[:, 3]).tolist() bboxes = bboxes.tolist() temp = [{ 'bbox': b, 'area': a, 'category_id': l, 'score': s, 'image_id': targets[i]['image_id'].item() } for b, a, l, s in zip(bboxes, areas, labels, scores)] results = list(itertools.chain(results, temp)) return results
def tracking_processing(self, boxes, boxes_ids, images, features, ROI_images, original_image_sizes): device = list(self.parameters())[0].device # resize all the given box to be aligned tck_proposals = [ resize_boxes(box, or_size, size) for box, or_size, size in zip( boxes, original_image_sizes, images.image_sizes) if box.nelement() ] tck_all_boxes = [] tck_all_scores = [] tck_all_labels = [] tck_all_ids = [] boxes_per_image = [ boxes_in_image.shape[0] for boxes_in_image in tck_proposals ] if tck_proposals: tck_box_features = self.roi_heads.box_roi_pool( features, tck_proposals, images.image_sizes) tck_box_features = self.roi_heads.box_head(tck_box_features) tck_class_logits, tck_box_regression = self.roi_heads.box_predictor( tck_box_features) tck_boxes = self.roi_heads.box_coder.decode( tck_box_regression, tck_proposals) tck_scores = F.softmax(tck_class_logits, -1) tck_scores, tck_labels = tck_scores[:, self.selected_classes].max(1) tck_boxes = tck_boxes[:, self.selected_classes] tck_boxes = torch.cat([ tck_boxes[idx][i].unsqueeze(0) for idx, i in enumerate(tck_labels) ]) tck_boxes_list = tck_boxes.split(boxes_per_image, 0) tck_scores_list = tck_scores.split(boxes_per_image, 0) tck_labels_list = tck_labels.split(boxes_per_image, 0) for boxes, scores, labels, box_ids, ROI_image, image_shape, original_im_shape in zip( tck_boxes_list, tck_scores_list, tck_labels_list, boxes_ids, ROI_images, images.image_sizes, original_image_sizes): boxes = clip_boxes_to_image(boxes, image_shape) # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) box_ids = box_ids.reshape(-1) # remove low scoring boxes keep = torch.nonzero(scores > self.tck_score_thresh).squeeze(1) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] box_ids = box_ids[keep] # remove small boxes keep = self.remove_small_boxes_area(boxes, min_size=self.tck_min_area) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] box_ids = box_ids[keep] # non-maximum suppression, independently done per class keep = nms(boxes, scores, self.tck_nms_thresh) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] box_ids = box_ids[keep] # keep only topk scoring predictions boxes = resize_boxes(boxes, image_shape, original_im_shape) if boxes.nelement(): keep = self.remove_boxes_out_roi( boxes, ROI_image, min_in_porcentage=self.tck_min_ROI_in) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] box_ids = box_ids[keep] tck_all_boxes.append(boxes) tck_all_scores.append(scores) tck_all_labels.append(labels) tck_all_ids.append(box_ids) else: tck_all_boxes.append(torch.empty(0, device=device)) tck_all_scores.append(torch.empty(0, device=device)) tck_all_labels.append(torch.empty(0, device=device)) tck_all_ids.append(torch.empty(0, device=device)) return tck_all_boxes, tck_all_scores, tck_all_labels, tck_all_ids
def detections_processing(self, images, features, ROI_images, tck_boxes, original_image_sizes): # rpn network det_proposals, proposal_losses = self.rpn(images, features) # roi heads to get boxes and scores det_box_features = self.roi_heads.box_roi_pool(features, det_proposals, images.image_sizes) det_box_features = self.roi_heads.box_head(det_box_features) det_class_logits, det_box_regression = self.roi_heads.box_predictor( det_box_features) det_boxes = self.roi_heads.box_coder.decode(det_box_regression, det_proposals) det_scores = F.softmax(det_class_logits, -1) boxes_per_image = [ boxes_in_image.shape[0] for boxes_in_image in det_proposals ] # this gets the max score and gives the label # this mean just for 3 classes get just one det_scores, det_labels = det_scores[:, self.selected_classes].max(1) det_boxes = det_boxes[:, self.selected_classes] det_boxes = torch.cat([ det_boxes[idx][i].unsqueeze(0) for idx, i in enumerate(det_labels) ]) # split the boxes scores and labels for each image for post processing det_boxes_list = det_boxes.split(boxes_per_image, 0) det_scores_list = det_scores.split(boxes_per_image, 0) det_labels_list = det_labels.split(boxes_per_image, 0) det_all_boxes = [] det_all_scores = [] det_all_labels = [] for boxes, scores, labels, ROI_image, tck_boxes_b, image_shape, original_im_shape in zip( det_boxes_list, det_scores_list, det_labels_list, ROI_images, tck_boxes, images.image_sizes, original_image_sizes): boxes = clip_boxes_to_image(boxes, image_shape) # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > self.det_score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove small boxes keep = self.remove_small_boxes_area(boxes, min_size=self.det_min_area) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # remove too big boxes #keep = self.remove_big_boxes_area(boxes, max_size=self.det_max_area) #boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = nms(boxes, scores, self.det_nms_thresh) #boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # keep only topk scoring predictions keep = keep[:self.roi_heads.detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] boxes = resize_boxes(boxes, image_shape, original_im_shape) if boxes.nelement(): keep = self.remove_boxes_out_roi( boxes, ROI_image, min_in_porcentage=self.det_min_ROI_in) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # filter detections in tracks for tck_box in tck_boxes_b: temp_boxes = torch.cat([tck_box.unsqueeze(0), boxes]) temp_scores = torch.cat( [torch.tensor([2.0]).to(boxes.device), scores]) keep = nms(temp_boxes, temp_scores, self.det_nms_thresh) keep = keep[torch.ge(keep, 1)] - 1 boxes, scores, labels = boxes[keep], scores[keep], labels[keep] if keep.nelement() == 0: break det_all_boxes.append(boxes) det_all_scores.append(scores) det_all_labels.append(labels) return det_all_boxes, det_all_scores, det_all_labels
def draw_according_nid(nid): """ calculating the confusion matrix for single image, if the image is divided into many subimages, merging them at first. """ path_img = os.path.join(img_root, '{}.png'.format(nid)) image = cv2.imread(os.path.join(img_root, '{}.png'.format(nid)), 1) h, w, c = image.shape pred_txts = [x for x in txt_names if int(x.split('_')[0])==nid] pred_txts = sorted(pred_txts, key=lambda x:int(x.split('_')[1])) involved_ids = [int((x.split('_')[1])) for x in pred_txts] preds = [] div = round(w/500.0) w_ = int(w//max(1, div)) for n in involved_ids: with open(os.path.join(preds_root, '{}_{}.txt'.format(nid, n)), 'r') as f: tmp = f.readlines() pred = [] if len(tmp)==0: pred = None else: pred = [np.array(list(map(float, e.split(' '))))[None, :] for e in tmp] pred = np.concatenate(pred) if n > 0: pred[:, 0] = pred[:, 0]+w_*n-100 pred[:, 2] = pred[:, 2]+w_*n-100 preds.append(pred) if len(preds)>0: preds = np.concatenate(preds) pred_boxes = preds[:, :-1] pred_scores = np.round(preds[:, -1], 2) #pred_boxes = preds[:, 0:] #pred_scores = np.zeros((pred_boxes.shape[0], )) else: pred_boxes = None pred_scores = None pred_boxes = torch.from_numpy(pred_boxes) if pred_boxes is not None else None if pred_boxes is not None: pred_scores = torch.from_numpy(pred_scores) _, order = torch.sort(pred_scores, 0, True) cls_dets = torch.cat((pred_boxes, pred_scores[:, None]), 1) cls_dets = cls_dets[order] keep = nms(pred_boxes[order, :], pred_scores[order], 0.3) cls_dets = cls_dets[keep.view(-1).long()] if occlusion_mask: img_occ_mask = cv2.imread(os.path.join(occlusion_mask_root, '{}.png'.format(nid)), 0).astype(np.int64) fg_mask = cv2.imread(os.path.join(fg_mask_root, '{}.png'.format(nid)), 0).astype(np.int64) img_occ_mask = np.where(img_occ_mask>0, 255, 0) img_occ_mask = np.clip(img_occ_mask-fg_mask, 0, 1).astype(np.int64) dets_stack = list() for e in cls_dets: if np.sum(img_occ_mask[int(e[1]):int(e[3]), int(e[0]):int(e[2])]) < 0.12*(e[3]-e[1])*(e[2]-e[0]): dets_stack.append(e) cls_dets = torch.stack(dets_stack) pred_boxes = cls_dets[:, 0:4] pred_scores = cls_dets[:, 4].numpy() draw(path_img, saved_image, pred_boxes, pred_scores, nid) with open(os.path.join(saved_txt, '{}.txt'.format(nid)), 'w') as f: for i, e in enumerate(cls_dets.numpy()): f.write(' '.join(map(str, e))+'\n')
def pipeline(img): ''' Pipeline function for detection and tracking ''' global frame_count global tracker_list global max_age global min_hits global track_id_list global debug global avg_fps frame_count += 1 #print("") #print(frame_count) #print("") start = time.time() img_dim = (img.shape[1], img.shape[0]) # YOLO detection for vehicle yolo_start = time.time() z_box = yolo_det.get_detected_boxes(img) #z_box_cpy= z_box yolo_end = time.time() # Lpd #print("Time taken for yolo detection is", yolo_end-yolo_start) track_start = time.time() if debug: print('Frame:', frame_count) x_box = [] if debug: for i in range(len(z_box)): img1 = helpers.draw_box_label(img, z_box[i], box_color=(255, 0, 0)) cv2.imshow("frame", img1) k = cv2.waitKey(10) if k == ord('e'): cv2.destroyAllWindows() sys.exit(-1) # plt.show() if len(tracker_list) > 0: for trk in tracker_list: x_box.append(trk.box) matched, unmatched_dets, unmatched_trks \ = assign_detections_to_trackers(x_box, z_box, iou_thrd=0.3) if debug: print('Detection: ', z_box) print('x_box: ', x_box) print('matched:', matched) print('unmatched_det:', unmatched_dets) print('unmatched_trks:', unmatched_trks) # Deal with matched detections if matched.size > 0: for trk_idx, det_idx in matched: z = z_box[det_idx] tmp_trk = tracker_list[trk_idx] tmp_trk.features.append(extract_feature(img, z)) z = np.expand_dims(z, axis=0).T tmp_trk.kalman_filter(z) xx = tmp_trk.x_state.T[0].tolist() xx = [xx[0], xx[2], xx[4], xx[6]] x_box[trk_idx] = xx tmp_trk.box = xx tmp_trk.hits += 1 tmp_trk.no_losses = 0 # Deal with unmatched detections if len(unmatched_dets) > 0: for idx in unmatched_dets: z = z_box[idx] if len(unmatched_trks) > 0: min_score = 10000000 tmp_idx = -1 for trk_idx in unmatched_trks: trk = tracker_list[trk_idx] #print(len(trk.features)) if len(trk.features) == 0: continue score = trk.feature_match(extract_feature( img, z)) ## find closest feature match if score < min_score: min_score = score tmp_idx = trk_idx if min_score < feature_thresh and tmp_idx != -1: z = np.expand_dims(z, axis=0).T tmp_trk = tracker_list[tmp_idx] tmp_trk.kalman_filter(z) xx = tmp_trk.x_state.T[0].tolist() xx = [xx[0], xx[2], xx[4], xx[6]] x_box[trk_idx] = xx tmp_trk.box = xx tmp_trk.hits += 1 tmp_trk.no_losses = 0 continue #new_boxes.append(z) z = np.expand_dims(z, axis=0).T tmp_trk = tr.Tracker() # Create a new tracker x = np.array([[z[0], 0, z[1], 0, z[2], 0, z[3], 0]]).T tmp_trk.x_state = x tmp_trk.predict_only() xx = tmp_trk.x_state xx = xx.T[0].tolist() xx = [xx[0], xx[2], xx[4], xx[6]] tmp_trk.box = xx tmp_trk.id = track_id_list.popleft( ) # assign an ID for the tracker tracker_list.append(tmp_trk) x_box.append(xx) # Deal with unmatched tracks*100 if len(unmatched_trks) > 0: for trk_idx in unmatched_trks: tmp_trk = tracker_list[trk_idx] tmp_trk.no_losses += 1 tmp_trk.predict_only() xx = tmp_trk.x_state xx = xx.T[0].tolist() xx = [xx[0], xx[2], xx[4], xx[6]] tmp_trk.box = xx x_box[trk_idx] = xx # The list of tracks to be annotated img_vis = img.copy() good_tracker_list = [] #print(img_dim) good_boxes = [] for trk in tracker_list: if ((trk.hits >= min_hits) and (trk.no_losses <= max_age)): good_tracker_list.append(trk) good_boxes.append(trk.box) #for trk in good_tracker_list: selected_ids = nms(torch.FloatTensor(np.array(good_boxes)), torch.FloatTensor([1.0] * len(good_boxes)), 0.45) for idx in selected_ids: trk = good_tracker_list[idx] x_cv2 = trk.box idx = trk.id if debug: print('updated box: ', x_cv2) print() # Draw the bounding boxes on the img_vis = helpers.draw_box_label(img_vis, x_cv2, idx) if frame_count % 5 == 0: y1_temp, x1_temp, y2_temp, x2_temp = x_cv2 w_temp = x2_temp - x1_temp h_temp = y2_temp - y1_temp if w_temp * h_temp < 400 or w_temp <= 0 or h_temp <= 0 or min( x_cv2) < 0: continue plates = [] #print(x_cv2) dt_start = time.time() Ivehicle = img[y1_temp:y2_temp, x1_temp:x2_temp] ratio = float(max(Ivehicle.shape[:2])) / min(Ivehicle.shape[:2]) side = int(ratio * 288.) bname = 'frame{}_{}.png'.format(frame_count, idx) bound_dim = min(side + (side % (2**4)), size) # print "\t\tBound dim: %d, ratio: %f" % (bound_dim,ratio) #dt_plates_start = time.time() Llp, LlpImgs, _ = detect_lp(wpod_net, im2single(Ivehicle), bound_dim, 2**4, (240, 80), lp_threshold) if len(LlpImgs): plates = [Llp[0].pts] cv2.imwrite("%s/%s" % (detected_plates_dir, bname), LlpImgs[0] * 255.) plate_string = _lpr.plates_ocr(LlpImgs[0] * 255.) for plate in plates: x1 = (plate[0][0] * w_temp + x1_temp).astype('int') y1 = (plate[1][0] * h_temp + y1_temp).astype('int') x2 = (plate[0][1] * w_temp + x1_temp).astype('int') y2 = (plate[1][1] * h_temp + y1_temp).astype('int') x3 = (plate[0][2] * w_temp + x1_temp).astype('int') y3 = (plate[1][2] * h_temp + y1_temp).astype('int') x4 = (plate[0][3] * w_temp + x1_temp).astype('int') y4 = (plate[1][3] * h_temp + y1_temp).astype('int') plate = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]], np.int32) plate = plate.reshape((-1, 1, 2)) cv2.polylines(img_vis, [plate], True, (255, 0, 0), 4) cv2.putText(img_vis, plate_string, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (0, 0, 255), 2) cv2.imwrite("%s/%s" % (detected_cars_dir, bname), img_vis[y1_temp:y2_temp, x1_temp:x2_temp]) track_end = time.time() # images # dt_start = time.time() print("Time taken to track the boxes is", track_end - track_start) end = time.time() fps = 1.0 / (end - start) #dt_fps = 1.0/(dt_dr+yolo_end-yolo_start) avg_fps += fps cv2.putText(img_vis, "FPS: {:.4f}".format(fps), (int(0.8 * img_dim[0]), 100), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (255, 255, 0), 4) #cv2.putText(img_vis, "Detect FPS: {:.4f}".format( # dt_fps), (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (255,255, 0), 4) # Book keeping deleted_tracks = filter(lambda x: x.no_losses > feature_tp, tracker_list) for trk in deleted_tracks: track_id_list.append(trk.id) tracker_list = [x for x in tracker_list if x.no_losses <= feature_tp] if debug: print('Ending tracker_list: ', len(tracker_list)) print('Ending good tracker_list: ', len(good_tracker_list)) return img_vis