def get_background_model(video: Video, train_stop_frame: int, total_frames: int = None, pixel_value: PixelValue = PixelValue.GRAY, disable_tqdm=False) -> (np.ndarray, np.ndarray): background_list = None i = 0 for im in tqdm(video.get_frames(0, train_stop_frame), total=total_frames, file=sys.stdout, desc='Training model...', disable=disable_tqdm): if background_list is None: background_list = np.zeros( (im.shape[0], im.shape[1], train_stop_frame), dtype=np.int16) if pixel_value == PixelValue.GRAY: background_list[:, :, i] = np.mean(im, axis=-1) elif PixelValue.HSV: background_list[:, :, i] = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)[:, :, 0] else: raise Exception i += 1 if pixel_value == PixelValue.GRAY: background_mean = np.mean(background_list, axis=-1) / 255 background_std = np.std(background_list, axis=-1) / 255 elif PixelValue.HSV: background_mean = np.mean(background_list, axis=-1) / 180 background_std = np.std(background_list, axis=-1) / 180 else: raise Exception return background_mean, background_std
def gaussian_model(video: Video, frame_start: int, background_mean: np.ndarray, background_std: np.ndarray, alpha: float = 2.5, pixel_value: PixelValue = PixelValue.GRAY, total_frames: int = None, disable_tqdm=False) -> Iterator[np.ndarray]: for im in tqdm(video.get_frames(frame_start), total=total_frames, file=sys.stdout, desc="Non-adaptive gaussian model...", disable=disable_tqdm): if pixel_value == PixelValue.GRAY: im_values = np.mean(im, axis=-1) / 255 elif PixelValue.HSV: im_values = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)[:, :, 0] / 180 else: raise Exception mask = (np.abs(im_values) - background_mean) >= (alpha * (background_std + (5 / 255))) yield im, mask.astype(np.uint8) * 255
def gaussian_model_adaptive(video: Video, train_stop_frame: int, background_mean: np.ndarray, background_std: np.ndarray, alpha: float = 2.5, rho: float = 0.1, pixel_value: PixelValue = PixelValue.GRAY, total_frames: int = None, disable_tqdm=False) -> Iterator[np.ndarray]: for im in tqdm(video.get_frames(train_stop_frame), total=total_frames, file=sys.stdout, desc='Adaptive gaussian model...', disable=disable_tqdm): if pixel_value == PixelValue.GRAY: im_values = np.mean(im, axis=-1) / 255 elif PixelValue.HSV: im_values = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)[:, :, 0] / 180 else: raise Exception mask = (np.abs(im_values) - background_mean) >= (alpha * (background_std + 5 / 255)) background_mean = rho * im_values + (1 - rho) * background_mean background_std = np.sqrt(rho * np.power((im_values - background_mean), 2) + (1 - rho) * np.power(background_std, 2)) yield im, mask.astype(np.uint8) * 255
def stabilization(optical_flow_method, debug: bool = False, **kwargs): """ Perform video stabilization using the given optical flow method. Idea: test some metric using a known logo. Using ORB matching we could detect if it moves. :param optical_flow_method: the optical flow method to use :param debug: whether to show debug plots """ video = Video('../datasets/stabilization/piano') feature_params = dict(maxCorners=500, qualityLevel=0.3, minDistance=7, blockSize=7) previous_frame = None accum_flow = np.zeros(2) count = 0 for i, frame in tqdm(enumerate(video.get_frames()), total=len(video), file=sys.stdout): rows, cols, _ = frame.shape if previous_frame is not None: if i % 4 == 0: p0 = cv2.goodFeaturesToTrack(cv2.cvtColor( previous_frame, cv2.COLOR_BGR2GRAY), mask=None, **feature_params) flow = optical_flow_method(previous_frame, frame, p0) if debug: show_optical_flow_arrows(previous_frame, flow) m = np.mean(flow[np.logical_or(flow[:, :, 0] != 0, flow[:, :, 1] != 0)], axis=(0, 1)) if not np.isnan(accum_flow).any(): accum_flow += -m transform = np.float32([[1, 0, accum_flow[0]], [0, 1, accum_flow[1]]]) frame2 = cv2.warpAffine(frame, transform, (cols, rows)) if debug: plt.figure() plt.imshow(cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB)) plt.axis('off') plt.show() cv2.imwrite("../video/block/OrigianlFrame%04d.jpg" % count, frame) # save frame as JPEG file cv2.imwrite("../video/block/StabilizedFrame%04d.jpg" % count, frame2) # save frame as JPEG file count += 1 previous_frame = frame
def week2_soa(video: Video, debug=False) -> Iterator[Frame]: th = 150 frame_id = 0 fgbg = cv.createBackgroundSubtractorMOG2() ground_truth = read_detections( '../datasets/AICity_data/train/S03/c010/gt/gt.txt') roi = cv.cvtColor( cv.imread('../datasets/AICity_data/train/S03/c010/roi.jpg'), cv.COLOR_BGR2GRAY) for im in tqdm(video.get_frames(), total=2141, file=sys.stdout, desc='Training model...'): mask = fgbg.apply(im) mask[mask < th] = 0 mask.astype(np.uint8) * 255 mask = mask & roi mask = opening(mask, 5) # cv.imshow('f', mask) # cv.waitKey() mask = closing(mask, 25) # cv.imshow('f', mask) # cv.waitKey() mask, detections = find_boxes(mask) frame = Frame(frame_id) frame.detections = detections frame.ground_truth = ground_truth[frame_id] frame_id += 1 yield im, mask, frame
def week2_soa_mod(video: Video, debug=False) -> Iterator[Frame]: th = 150 fgbg = cv.createBackgroundSubtractorMOG2() for im, frame in tqdm(video.get_frames(int(2141 * 0.25)), total=int(2141 * 0.25) * 0.75, file=sys.stdout, desc='Training model...'): fgmask = fgbg.apply(im) fgmask[fgmask < th] = 0 kernel_e = np.ones((5, 5), np.uint8) kernel_d = np.ones((9, 9), np.uint8) diag = np.identity(5) t_diag = np.flip(diag, 0) kernel_d2 = np.uint8(np.logical_or(diag, t_diag)) fgmask = cv.erode(fgmask, kernel_e) fgmask = cv.dilate(fgmask, kernel_d) fgmask = cv.dilate(fgmask, kernel_d2) cv.imshow('frame', fgmask) k = cv.waitKey(30) & 0xff if k == 27: break cv.destroyAllWindows()
def main(): video = Video("../datasets/AICity_data/train/S03/c010/vdo.avi") gt = read_annotations('../annotations', start_frame, end_frame) """ DETECTIONS """ det_algs = ['yolo3', 'mask_rcnn', 'ssd512'] for alg in det_algs: detections = read_detections( '../datasets/AICity_data/train/S03/c010/det/det_{0}.txt'.format( alg)) detections = detections[start_frame:end_frame + 1] frames = [] # roi = cv2.imread('../datasets/AICity_data/train/S03/c010/roi.jpg') for im, f in seq(video.get_frames( start_frame_number=start_frame)).take(end_frame - start_frame + 1): f.ground_truth = gt[f.id] f.detections = detections[f.id] frames.append(f) if make_video: make_video_frame(im, f, frames) iou_over_time(frames) mAP = mean_average_precision(frames) print(alg, " mAP:", mAP) """ DETECTIONS FROM ALTERED GROUND TRUTH """ frames = [] for im, f in seq(video.get_frames()).take(end_frame - start_frame + 1): f.ground_truth = gt[f.id] f.detections = alter_detections(f.ground_truth) frames.append(f) if make_video: make_video_frame(im, f, frames) iou_over_time(frames) mAP = mean_average_precision(frames) print('Random alteration', " mAP:", mAP) """ OPTICAL FLOW """ of_det_1 = read_optical_flow( '../datasets/optical_flow/detection/LKflow_000045_10.png') of_det_2 = read_optical_flow( '../datasets/optical_flow/detection/LKflow_000157_10.png') of_gt_1 = read_optical_flow('../datasets/optical_flow/gt/000045_10.png') of_gt_2 = read_optical_flow('../datasets/optical_flow/gt/000157_10.png') img_1 = cv2.imread('../datasets/optical_flow/img/000045_10.png') img_2 = cv2.imread('../datasets/optical_flow/img/000157_10.png') msen_of = msen(of_det_2, of_gt_2) pepn_of = pepn(of_det_2, of_gt_2) print(msen_of, pepn_of) show_optical_flow(of_gt_1) show_optical_flow_arrows(img_1, of_gt_1) msen_45 = msen(of_det_1, of_gt_1, plot=True) pepn_45 = pepn(of_det_1, of_gt_1) print("Sequence 045: MSEN", msen_45, "PEPN", pepn_45) msen_157 = msen(of_det_2, of_gt_2, plot=True) pepn_157 = pepn(of_det_2, of_gt_2) print("Sequence 157: MSEN", msen_157, "PEPN", pepn_157) show_optical_flow(of_gt_1)
def off_the_shelf_yolo(tracking, debug=False, *args, **kwargs): video = Video("../datasets/AICity_data/train/S03/c010/frames") detection_transform = DetectionTransform() classes = utils.load_classes('../config/coco.names') gt = read_annotations( '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml') model = Darknet('../config/yolov3.cfg') model.load_weights('../weights/fine_tuned_yolo_freeze.weights') if torch.cuda.is_available(): model = model.cuda() frames = [] last_im = None model.eval() with torch.no_grad(): for i, im in tqdm(enumerate(video.get_frames(start=len(video) // 4)), total=len(video), file=sys.stdout, desc='Yolo'): im_tensor = detection_transform(im) im_tensor = im_tensor.view((-1, ) + im_tensor.size()) if torch.cuda.is_available(): im_tensor = im_tensor.cuda() detections = model.forward(im_tensor) detections = utils.non_max_suppression(detections, 80, conf_thres=.6, nms_thres=0.3) frame = Frame(i + (len(video) // 4)) frame.ground_truth = gt[frame.id] for d in detections[0]: if int(d[6]) in VALID_LABELS: bbox = d.cpu().numpy() det = Detection(-1, classes[int(d[6])], (bbox[0], bbox[1]), width=bbox[2] - bbox[0], height=bbox[3] - bbox[1], confidence=d[5]) detection_transform.unshrink_detection(det) frame.detections.append(det) if tracking is not None: last_frame = None if len(frames) == 0 else frames[-1] tracking(frame=frame, im=im, last_frame=last_frame, last_im=last_im, frames=frames, debug=False) frames.append(frame) last_im = im if debug: plt.figure() for det in frame.detections: rect = patches.Rectangle(det.top_left, det.width, det.height, linewidth=2, edgecolor='blue', facecolor='none') plt.gca().add_patch(rect) if tracking is None: text = '{}'.format(det.label) else: text = '{} ~ {}'.format(det.label, det.id) plt.text(det.top_left[0], det.top_left[1], s=text, color='white', verticalalignment='top', bbox={ 'color': 'blue', 'pad': 0 }) plt.imshow(im) plt.axis('off') # plt.savefig('../video/video_yolo_fine_tune_good/frame_{:04d}'.format(i)) plt.show() plt.close() # iou_over_time(frames) mAP = mean_average_precision(frames) print("YOLO mAP:", mAP)
def off_the_shelf_ssd(tracking, debug=False, **kwargs): if cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') gt = read_annotations( '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml') video = Video("../datasets/AICity_data/train/S03/c010/frames") trans = transforms.Compose( [transforms.Resize((300, 300)), transforms.ToTensor()]) labels = ( # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') model = build_ssd('test', 300, 21) # initialize SSD model.load_weights('../weights/ssd300_mAP_77.43_v2.pth') if torch.cuda.is_available(): model = model.cuda() frames = [] model.eval() with torch.no_grad(): for i, im in enumerate(video.get_frames()): im_tensor = trans(im) im_tensor = im_tensor.view((-1, ) + im_tensor.size()) if torch.cuda.is_available(): im_tensor = im_tensor.cuda() output = model.forward(im_tensor) detections = output.data w = im.width h = im.height frame = Frame(i) frame.ground_truth = gt[frame.id] # skip j = 0, because it's the background class for j in (2, 6, 7, 14): dets = detections[0, j, :] mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, 5) if dets.size(0) == 0: continue boxes = dets[:, 1:] scores = dets[:, 0].cpu().numpy() cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])).astype(np.float32, copy=False) for cls_det in cls_dets: x1 = int(w * cls_det[0]) y1 = int(h * cls_det[1]) det = Detection(-1, labels[j - 1], (x1, y1), width=w * (cls_det[2] - cls_det[0]), height=h * (cls_det[3] - cls_det[1]), confidence=cls_det[4]) frame.detections.append(det) # kalman(frame) if tracking is not None: tracking(frame, frames, debug=debug) frames.append(frame) if debug: plt.figure() for det in frame.detections: rect = patches.Rectangle(det.top_left, det.width, det.height, linewidth=2, edgecolor='blue', facecolor='none') plt.gca().add_patch(rect) plt.text(det.top_left[0], det.top_left[1], s='{} ~ {}'.format(det.label, det.id), color='white', verticalalignment='top', bbox={ 'color': 'blue', 'pad': 0 }) plt.imshow(im) plt.axis('off') # plt.savefig('../video/video_ssd_KalmanID/frame_{:04d}'.format(i)) plt.show() plt.close() #iou_over_time(frames) mAP = mean_average_precision(frames) print("SSD mAP:", mAP)