def alter_detections(detections: List[Detection]) -> List[Detection]: prob_not = 0.05 translation = 30 scale = [0.5, 2] prob_fp = 0.1 frame_detections = [] for d in detections: if random.uniform(0, 1) < prob_not: continue tl_x, tl_y = d.top_left tl_x += random.uniform(0, 1) * translation tl_y += random.uniform(0, 1) * translation width = d.width * random.uniform(scale[0], scale[1]) height = d.height * random.uniform(scale[0], scale[1]) frame_detections.append( Detection(d.id, d.label, (tl_x, tl_y), width, height)) while random.uniform(0, 1) < prob_fp: frame_detections.append( Detection('', 'car', (random.uniform(0, 100), random.uniform(0, 900)), random.uniform(50, 150), random.uniform(50, 150))) return frame_detections
def _find_id(self, detection: Detection, dets_old: List[Detection]) -> None: if self.prev_det is None: return for detection2 in dets_old: if detection.iou(detection2) > INTERSECTION_THRESHOLD: detection.id = detection2.id break
def _find_id(det_new: Detection, dets_old: List[Detection], im2, debug: bool = False) -> None: for det in dets_old: if det_new.iou(det) > INTERSECTION_THRESHOLD: if debug: rect = patches.Rectangle((det.top_left[1], det.top_left[0]), det.height, det.width, linewidth=1, edgecolor='blue', facecolor='none') plt.gca().add_patch(rect) rect = patches.Rectangle((det_new.top_left[1], det_new.top_left[0]), det_new.height, det_new.width, linewidth=1, edgecolor='red', facecolor='none') plt.gca().add_patch(rect) det_new.id = det.id break
def __call__(self, frame: Frame, siamese: SiameseDB, debug=False, plot_number=False) -> None: self.debug = debug det1_flow = [] if self.prev_img is not None: flow = self._optical_flow(frame.image) if debug: show_optical_flow_arrows(frame.image, flow) for det in self.prev_det: det_flow = flow[det.top_left[1]:det.top_left[1] + det.height, det.top_left[0]:det.top_left[0] + det.width, :] accum_flow = (0, 0) non_zero_values = det_flow[np.logical_or(det_flow[:, :, 0] != 0, det_flow[:, :, 1] != 0), :] if non_zero_values.size > 0: accum_flow = np.mean(non_zero_values, axis=0) det1_flow.append( Detection(det.id, det.label, (int(det.top_left[0] + accum_flow[1]), int(det.top_left[1] + accum_flow[0])), det.width, det.height)) for detection in frame.detections: self._find_id(detection, det1_flow) if detection.id == -1: if siamese is not None: new_id = siamese.query(frame.image, detection) if new_id != -1: detection.id = new_id else: detection.id = IDGenerator.next() else: detection.id = IDGenerator.next() self.prev_det = frame.detections self.prev_img = frame.image if debug: self.plot_tracking_color(frame, plot_number)
def read_annotations(file_path: str, frames: int = 2140) -> List[List[Detection]]: frames_detections = [] root = ET.parse(file_path).getroot() tracks = root.findall('track') for i in range(frames + 1): frame_detections = [] for track in tracks: id_value = int(track.attrib["id"]) label = track.attrib["label"] if label == 'bike': label = 'bicycle' box = track.find('box[@frame="{}"]'.format(i)) if box is not None: xtl = int(float((box.attrib["xtl"]))) ytl = int(float((box.attrib["ytl"]))) xbr = int(float((box.attrib["xbr"]))) ybr = int(float((box.attrib["ybr"]))) frame_detections.append(Detection(id_value, label, (xtl, ytl), xbr - xtl + 1, ybr - ytl + 1)) frames_detections.append(frame_detections) return frames_detections
def read_detections(path: str) -> List[List[Detection]]: # [frame, -1, left, top, width, height, conf, -1, -1, -1] frame_detections = [] with open(path) as f: for line in f.readlines(): parts = line.split(',') frame_id = int(parts[0]) while frame_id > len(frame_detections): frame_detections.append([]) tl_x = int(float(parts[2])) tl_y = int(float(parts[3])) width = int(float(parts[4])) height = int(float(parts[5])) confidence = float(parts[6]) frame_detections[-1].append( Detection(int(parts[1]), 'car', (tl_x, tl_y), width, height, confidence=confidence)) return frame_detections
def overlap_flow_tracking(optical_flow_method, im1: np.ndarray, det1: List[Detection], im2: np.ndarray, det2: List[Detection], debug: bool = False, mot=None, gt1=None, count=0): feature_params = dict(maxCorners=500, qualityLevel=0.3, minDistance=7, blockSize=7) det1_flow = [] if im1 is not None: mask = np.zeros((im1.shape[0], im1.shape[1]), dtype=np.uint8) for det in det1: mask[det.top_left[0]:det.top_left[0] + det.width, det.top_left[1]:det.top_left[1] + det.height] = 255 p0 = cv2.goodFeaturesToTrack(cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY), mask=mask, **feature_params) flow = optical_flow_method(im1, im2, p0) for det in det1: det_flow = flow[det.top_left[0]:det.top_left[0] + det.width, det.top_left[1]:det.top_left[1] + det.height, :] accum_flow = np.mean(det_flow[np.logical_or(det_flow[:, :, 0] != 0, det_flow[:, :, 1] != 0), :], axis=0) if np.isnan(accum_flow).any(): accum_flow = (0, 0) det1_flow.append( Detection(det.id, det.label, (int(det.top_left[0] + accum_flow[1]), int(det.top_left[1] + accum_flow[0])), det.width, det.height)) if debug: plt.figure(figsize=(8, 3)) plt.subplot(1, 2, 2) for det in det2: if im1 is not None: _find_id(det, det1_flow, im2, debug=debug) if det.id == -1: det.id = IDGenerator.next() if debug: plt.imshow(cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)) plt.axis('off') plt.subplot(1, 2, 1) if det1 is not None: for det in det1: rect = patches.Rectangle((det.top_left[1], det.top_left[0]), det.height, det.width, linewidth=1, edgecolor='blue', facecolor='none') plt.gca().add_patch(rect) plt.imshow(cv2.cvtColor(im1, cv2.COLOR_BGR2RGB)) plt.axis('off') plt.savefig('../video/tracking/{:04d}'.format(count)) plt.close() if mot is not None and gt1 is not None: mot.update(det1_flow, gt1)
def main(): im_1440 = cv2.imread( "../datasets/AICity_data_S03_c010_1440/frame_1440.jpg") top_left = [995, 410] width = 1241 - 995 height = 605 - 410 ground_truth = [Detection('', 'car', top_left, width, height)] """ DETECTIONS FROM ALTERED GROUND TRUTH """ frame = Frame(0, ground_truth) frame.detections = alter_detections(ground_truth) plot_frame(im_1440, frame) iou = frame.get_detection_iou() iou_mean = frame.get_detection_iou_mean() print("IOU: ", iou, "IOU mean", iou_mean)
def read_annotations(root_directory: str, start: int, end: int) -> List[List[Detection]]: frames_detections = [] for i in range(start, end + 1): frame_path = 'frame_{:04d}.xml'.format(i) root = ET.parse(os.path.join(root_directory, frame_path)).getroot() frame_detections = [] for obj in root.findall('object'): box = obj.find('bndbox') label = obj.find('name').text xmin = int(box.find('xmin').text) ymin = int(box.find('ymin').text) xmax = int(box.find('xmax').text) ymax = int(box.find('ymax').text) frame_detections.append(Detection('', label, (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1)) frames_detections.append(frame_detections) return frames_detections
def off_the_shelf_yolo(tracking, debug=False, *args, **kwargs): video = Video("../datasets/AICity_data/train/S03/c010/frames") detection_transform = DetectionTransform() classes = utils.load_classes('../config/coco.names') gt = read_annotations( '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml') model = Darknet('../config/yolov3.cfg') model.load_weights('../weights/fine_tuned_yolo_freeze.weights') if torch.cuda.is_available(): model = model.cuda() frames = [] last_im = None model.eval() with torch.no_grad(): for i, im in tqdm(enumerate(video.get_frames(start=len(video) // 4)), total=len(video), file=sys.stdout, desc='Yolo'): im_tensor = detection_transform(im) im_tensor = im_tensor.view((-1, ) + im_tensor.size()) if torch.cuda.is_available(): im_tensor = im_tensor.cuda() detections = model.forward(im_tensor) detections = utils.non_max_suppression(detections, 80, conf_thres=.6, nms_thres=0.3) frame = Frame(i + (len(video) // 4)) frame.ground_truth = gt[frame.id] for d in detections[0]: if int(d[6]) in VALID_LABELS: bbox = d.cpu().numpy() det = Detection(-1, classes[int(d[6])], (bbox[0], bbox[1]), width=bbox[2] - bbox[0], height=bbox[3] - bbox[1], confidence=d[5]) detection_transform.unshrink_detection(det) frame.detections.append(det) if tracking is not None: last_frame = None if len(frames) == 0 else frames[-1] tracking(frame=frame, im=im, last_frame=last_frame, last_im=last_im, frames=frames, debug=False) frames.append(frame) last_im = im if debug: plt.figure() for det in frame.detections: rect = patches.Rectangle(det.top_left, det.width, det.height, linewidth=2, edgecolor='blue', facecolor='none') plt.gca().add_patch(rect) if tracking is None: text = '{}'.format(det.label) else: text = '{} ~ {}'.format(det.label, det.id) plt.text(det.top_left[0], det.top_left[1], s=text, color='white', verticalalignment='top', bbox={ 'color': 'blue', 'pad': 0 }) plt.imshow(im) plt.axis('off') # plt.savefig('../video/video_yolo_fine_tune_good/frame_{:04d}'.format(i)) plt.show() plt.close() # iou_over_time(frames) mAP = mean_average_precision(frames) print("YOLO mAP:", mAP)
def off_the_shelf_ssd(tracking, debug=False, **kwargs): if cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') gt = read_annotations( '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml') video = Video("../datasets/AICity_data/train/S03/c010/frames") trans = transforms.Compose( [transforms.Resize((300, 300)), transforms.ToTensor()]) labels = ( # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') model = build_ssd('test', 300, 21) # initialize SSD model.load_weights('../weights/ssd300_mAP_77.43_v2.pth') if torch.cuda.is_available(): model = model.cuda() frames = [] model.eval() with torch.no_grad(): for i, im in enumerate(video.get_frames()): im_tensor = trans(im) im_tensor = im_tensor.view((-1, ) + im_tensor.size()) if torch.cuda.is_available(): im_tensor = im_tensor.cuda() output = model.forward(im_tensor) detections = output.data w = im.width h = im.height frame = Frame(i) frame.ground_truth = gt[frame.id] # skip j = 0, because it's the background class for j in (2, 6, 7, 14): dets = detections[0, j, :] mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, 5) if dets.size(0) == 0: continue boxes = dets[:, 1:] scores = dets[:, 0].cpu().numpy() cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])).astype(np.float32, copy=False) for cls_det in cls_dets: x1 = int(w * cls_det[0]) y1 = int(h * cls_det[1]) det = Detection(-1, labels[j - 1], (x1, y1), width=w * (cls_det[2] - cls_det[0]), height=h * (cls_det[3] - cls_det[1]), confidence=cls_det[4]) frame.detections.append(det) # kalman(frame) if tracking is not None: tracking(frame, frames, debug=debug) frames.append(frame) if debug: plt.figure() for det in frame.detections: rect = patches.Rectangle(det.top_left, det.width, det.height, linewidth=2, edgecolor='blue', facecolor='none') plt.gca().add_patch(rect) plt.text(det.top_left[0], det.top_left[1], s='{} ~ {}'.format(det.label, det.id), color='white', verticalalignment='top', bbox={ 'color': 'blue', 'pad': 0 }) plt.imshow(im) plt.axis('off') # plt.savefig('../video/video_ssd_KalmanID/frame_{:04d}'.format(i)) plt.show() plt.close() #iou_over_time(frames) mAP = mean_average_precision(frames) print("SSD mAP:", mAP)
def unshrink_detection(self, det: Detection) -> None: top_left = (int(det.top_left[0] * self.scale - self.pad[0]), int(det.top_left[1] * self.scale - self.pad[1])) det.top_left = top_left det.width = int(det.width * self.scale) det.height = int(det.height * self.scale)
def shrink_detection(self, det: Detection) -> None: top_left = (int((det.top_left[0] + self.pad[0]) / self.scale), int((det.top_left[1] + self.pad[1]) / self.scale)) det.top_left = top_left det.width = int(det.width / self.scale) det.height = int(det.height / self.scale)
def _find_id(self, detection: Detection, frame_list: List[Frame]) -> None: for i in range(-1, max(-self.look_back, -len(frame_list)) - 1, -1): for detection2 in frame_list[i].detections: if detection.iou(detection2) > INTERSECTION_THRESHOLD: detection.id = detection2.id return