def get_background_model(video: Video,
                         train_stop_frame: int,
                         total_frames: int = None,
                         pixel_value: PixelValue = PixelValue.GRAY,
                         disable_tqdm=False) -> (np.ndarray, np.ndarray):
    background_list = None
    i = 0
    for im in tqdm(video.get_frames(0, train_stop_frame),
                   total=total_frames,
                   file=sys.stdout,
                   desc='Training model...',
                   disable=disable_tqdm):
        if background_list is None:
            background_list = np.zeros(
                (im.shape[0], im.shape[1], train_stop_frame), dtype=np.int16)

        if pixel_value == PixelValue.GRAY:
            background_list[:, :, i] = np.mean(im, axis=-1)
        elif PixelValue.HSV:
            background_list[:, :, i] = cv2.cvtColor(im,
                                                    cv2.COLOR_BGR2HSV)[:, :, 0]
        else:
            raise Exception
        i += 1

    if pixel_value == PixelValue.GRAY:
        background_mean = np.mean(background_list, axis=-1) / 255
        background_std = np.std(background_list, axis=-1) / 255
    elif PixelValue.HSV:
        background_mean = np.mean(background_list, axis=-1) / 180
        background_std = np.std(background_list, axis=-1) / 180
    else:
        raise Exception

    return background_mean, background_std
def gaussian_model(video: Video,
                   frame_start: int,
                   background_mean: np.ndarray,
                   background_std: np.ndarray,
                   alpha: float = 2.5,
                   pixel_value: PixelValue = PixelValue.GRAY,
                   total_frames: int = None,
                   disable_tqdm=False) -> Iterator[np.ndarray]:
    for im in tqdm(video.get_frames(frame_start),
                   total=total_frames,
                   file=sys.stdout,
                   desc="Non-adaptive gaussian model...",
                   disable=disable_tqdm):

        if pixel_value == PixelValue.GRAY:
            im_values = np.mean(im, axis=-1) / 255
        elif PixelValue.HSV:
            im_values = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)[:, :, 0] / 180
        else:
            raise Exception

        mask = (np.abs(im_values) - background_mean) >= (alpha *
                                                         (background_std +
                                                          (5 / 255)))

        yield im, mask.astype(np.uint8) * 255
def gaussian_model_adaptive(video: Video,
                            train_stop_frame: int,
                            background_mean: np.ndarray,
                            background_std: np.ndarray,
                            alpha: float = 2.5,
                            rho: float = 0.1,
                            pixel_value: PixelValue = PixelValue.GRAY,
                            total_frames: int = None,
                            disable_tqdm=False) -> Iterator[np.ndarray]:
    for im in tqdm(video.get_frames(train_stop_frame),
                   total=total_frames,
                   file=sys.stdout,
                   desc='Adaptive gaussian model...',
                   disable=disable_tqdm):

        if pixel_value == PixelValue.GRAY:
            im_values = np.mean(im, axis=-1) / 255
        elif PixelValue.HSV:
            im_values = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)[:, :, 0] / 180
        else:
            raise Exception

        mask = (np.abs(im_values) -
                background_mean) >= (alpha * (background_std + 5 / 255))
        background_mean = rho * im_values + (1 - rho) * background_mean
        background_std = np.sqrt(rho *
                                 np.power((im_values - background_mean), 2) +
                                 (1 - rho) * np.power(background_std, 2))

        yield im, mask.astype(np.uint8) * 255
Example #4
0
def stabilization(optical_flow_method, debug: bool = False, **kwargs):
    """
    Perform video stabilization using the given optical flow method.

    Idea: test some metric using a known logo. Using ORB matching we could detect if it moves.

    :param optical_flow_method: the optical flow method to use
    :param debug: whether to show debug plots
    """
    video = Video('../datasets/stabilization/piano')
    feature_params = dict(maxCorners=500,
                          qualityLevel=0.3,
                          minDistance=7,
                          blockSize=7)
    previous_frame = None
    accum_flow = np.zeros(2)
    count = 0
    for i, frame in tqdm(enumerate(video.get_frames()),
                         total=len(video),
                         file=sys.stdout):
        rows, cols, _ = frame.shape
        if previous_frame is not None:
            if i % 4 == 0:
                p0 = cv2.goodFeaturesToTrack(cv2.cvtColor(
                    previous_frame, cv2.COLOR_BGR2GRAY),
                                             mask=None,
                                             **feature_params)
                flow = optical_flow_method(previous_frame, frame, p0)
                if debug:
                    show_optical_flow_arrows(previous_frame, flow)

                m = np.mean(flow[np.logical_or(flow[:, :, 0] != 0,
                                               flow[:, :, 1] != 0)],
                            axis=(0, 1))
                if not np.isnan(accum_flow).any():
                    accum_flow += -m
                transform = np.float32([[1, 0, accum_flow[0]],
                                        [0, 1, accum_flow[1]]])
                frame2 = cv2.warpAffine(frame, transform, (cols, rows))

                if debug:
                    plt.figure()
                    plt.imshow(cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB))
                    plt.axis('off')
                    plt.show()
                cv2.imwrite("../video/block/OrigianlFrame%04d.jpg" % count,
                            frame)  # save frame as JPEG file
                cv2.imwrite("../video/block/StabilizedFrame%04d.jpg" % count,
                            frame2)  # save frame as JPEG file

                count += 1
        previous_frame = frame
Example #5
0
def week2_soa(video: Video, debug=False) -> Iterator[Frame]:
    th = 150
    frame_id = 0
    fgbg = cv.createBackgroundSubtractorMOG2()

    ground_truth = read_detections(
        '../datasets/AICity_data/train/S03/c010/gt/gt.txt')
    roi = cv.cvtColor(
        cv.imread('../datasets/AICity_data/train/S03/c010/roi.jpg'),
        cv.COLOR_BGR2GRAY)

    for im in tqdm(video.get_frames(),
                   total=2141,
                   file=sys.stdout,
                   desc='Training model...'):
        mask = fgbg.apply(im)
        mask[mask < th] = 0

        mask.astype(np.uint8) * 255

        mask = mask & roi

        mask = opening(mask, 5)
        # cv.imshow('f', mask)
        # cv.waitKey()

        mask = closing(mask, 25)
        # cv.imshow('f', mask)
        # cv.waitKey()

        mask, detections = find_boxes(mask)

        frame = Frame(frame_id)
        frame.detections = detections
        frame.ground_truth = ground_truth[frame_id]

        frame_id += 1

        yield im, mask, frame
Example #6
0
def week2_soa_mod(video: Video, debug=False) -> Iterator[Frame]:
    th = 150
    fgbg = cv.createBackgroundSubtractorMOG2()
    for im, frame in tqdm(video.get_frames(int(2141 * 0.25)),
                          total=int(2141 * 0.25) * 0.75,
                          file=sys.stdout,
                          desc='Training model...'):

        fgmask = fgbg.apply(im)
        fgmask[fgmask < th] = 0
        kernel_e = np.ones((5, 5), np.uint8)
        kernel_d = np.ones((9, 9), np.uint8)
        diag = np.identity(5)
        t_diag = np.flip(diag, 0)
        kernel_d2 = np.uint8(np.logical_or(diag, t_diag))
        fgmask = cv.erode(fgmask, kernel_e)
        fgmask = cv.dilate(fgmask, kernel_d)
        fgmask = cv.dilate(fgmask, kernel_d2)
        cv.imshow('frame', fgmask)
        k = cv.waitKey(30) & 0xff
        if k == 27:
            break
    cv.destroyAllWindows()
Example #7
0
def main():
    video = Video("../datasets/AICity_data/train/S03/c010/vdo.avi")

    gt = read_annotations('../annotations', start_frame, end_frame)
    """
        DETECTIONS
    """
    det_algs = ['yolo3', 'mask_rcnn', 'ssd512']
    for alg in det_algs:
        detections = read_detections(
            '../datasets/AICity_data/train/S03/c010/det/det_{0}.txt'.format(
                alg))
        detections = detections[start_frame:end_frame + 1]

        frames = []

        # roi = cv2.imread('../datasets/AICity_data/train/S03/c010/roi.jpg')

        for im, f in seq(video.get_frames(
                start_frame_number=start_frame)).take(end_frame - start_frame +
                                                      1):
            f.ground_truth = gt[f.id]
            f.detections = detections[f.id]
            frames.append(f)

            if make_video:
                make_video_frame(im, f, frames)

        iou_over_time(frames)
        mAP = mean_average_precision(frames)
        print(alg, " mAP:", mAP)
    """
        DETECTIONS FROM ALTERED GROUND TRUTH 
    """
    frames = []

    for im, f in seq(video.get_frames()).take(end_frame - start_frame + 1):
        f.ground_truth = gt[f.id]
        f.detections = alter_detections(f.ground_truth)
        frames.append(f)

        if make_video:
            make_video_frame(im, f, frames)

    iou_over_time(frames)
    mAP = mean_average_precision(frames)
    print('Random alteration', " mAP:", mAP)
    """
        OPTICAL FLOW 
    """
    of_det_1 = read_optical_flow(
        '../datasets/optical_flow/detection/LKflow_000045_10.png')
    of_det_2 = read_optical_flow(
        '../datasets/optical_flow/detection/LKflow_000157_10.png')

    of_gt_1 = read_optical_flow('../datasets/optical_flow/gt/000045_10.png')
    of_gt_2 = read_optical_flow('../datasets/optical_flow/gt/000157_10.png')

    img_1 = cv2.imread('../datasets/optical_flow/img/000045_10.png')
    img_2 = cv2.imread('../datasets/optical_flow/img/000157_10.png')

    msen_of = msen(of_det_2, of_gt_2)
    pepn_of = pepn(of_det_2, of_gt_2)

    print(msen_of, pepn_of)
    show_optical_flow(of_gt_1)
    show_optical_flow_arrows(img_1, of_gt_1)

    msen_45 = msen(of_det_1, of_gt_1, plot=True)
    pepn_45 = pepn(of_det_1, of_gt_1)
    print("Sequence 045: MSEN", msen_45, "PEPN", pepn_45)

    msen_157 = msen(of_det_2, of_gt_2, plot=True)
    pepn_157 = pepn(of_det_2, of_gt_2)
    print("Sequence 157: MSEN", msen_157, "PEPN", pepn_157)

    show_optical_flow(of_gt_1)
Example #8
0
def off_the_shelf_yolo(tracking, debug=False, *args, **kwargs):
    video = Video("../datasets/AICity_data/train/S03/c010/frames")
    detection_transform = DetectionTransform()
    classes = utils.load_classes('../config/coco.names')
    gt = read_annotations(
        '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml')

    model = Darknet('../config/yolov3.cfg')
    model.load_weights('../weights/fine_tuned_yolo_freeze.weights')
    if torch.cuda.is_available():
        model = model.cuda()

    frames = []
    last_im = None

    model.eval()
    with torch.no_grad():
        for i, im in tqdm(enumerate(video.get_frames(start=len(video) // 4)),
                          total=len(video),
                          file=sys.stdout,
                          desc='Yolo'):
            im_tensor = detection_transform(im)

            im_tensor = im_tensor.view((-1, ) + im_tensor.size())
            if torch.cuda.is_available():
                im_tensor = im_tensor.cuda()

            detections = model.forward(im_tensor)
            detections = utils.non_max_suppression(detections,
                                                   80,
                                                   conf_thres=.6,
                                                   nms_thres=0.3)

            frame = Frame(i + (len(video) // 4))
            frame.ground_truth = gt[frame.id]

            for d in detections[0]:
                if int(d[6]) in VALID_LABELS:
                    bbox = d.cpu().numpy()
                    det = Detection(-1,
                                    classes[int(d[6])], (bbox[0], bbox[1]),
                                    width=bbox[2] - bbox[0],
                                    height=bbox[3] - bbox[1],
                                    confidence=d[5])
                    detection_transform.unshrink_detection(det)
                    frame.detections.append(det)

            if tracking is not None:
                last_frame = None if len(frames) == 0 else frames[-1]
                tracking(frame=frame,
                         im=im,
                         last_frame=last_frame,
                         last_im=last_im,
                         frames=frames,
                         debug=False)

            frames.append(frame)

            last_im = im

            if debug:
                plt.figure()
                for det in frame.detections:
                    rect = patches.Rectangle(det.top_left,
                                             det.width,
                                             det.height,
                                             linewidth=2,
                                             edgecolor='blue',
                                             facecolor='none')
                    plt.gca().add_patch(rect)
                    if tracking is None:
                        text = '{}'.format(det.label)
                    else:
                        text = '{} ~ {}'.format(det.label, det.id)
                    plt.text(det.top_left[0],
                             det.top_left[1],
                             s=text,
                             color='white',
                             verticalalignment='top',
                             bbox={
                                 'color': 'blue',
                                 'pad': 0
                             })
                plt.imshow(im)
                plt.axis('off')
                # plt.savefig('../video/video_yolo_fine_tune_good/frame_{:04d}'.format(i))
                plt.show()
                plt.close()
        # iou_over_time(frames)
        mAP = mean_average_precision(frames)
        print("YOLO mAP:", mAP)
def off_the_shelf_ssd(tracking, debug=False, **kwargs):
    if cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    gt = read_annotations(
        '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml')
    video = Video("../datasets/AICity_data/train/S03/c010/frames")
    trans = transforms.Compose(
        [transforms.Resize((300, 300)),
         transforms.ToTensor()])

    labels = (  # always index 0
        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
        'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
        'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')

    model = build_ssd('test', 300, 21)  # initialize SSD
    model.load_weights('../weights/ssd300_mAP_77.43_v2.pth')
    if torch.cuda.is_available():
        model = model.cuda()

    frames = []

    model.eval()
    with torch.no_grad():
        for i, im in enumerate(video.get_frames()):

            im_tensor = trans(im)
            im_tensor = im_tensor.view((-1, ) + im_tensor.size())
            if torch.cuda.is_available():
                im_tensor = im_tensor.cuda()

            output = model.forward(im_tensor)
            detections = output.data

            w = im.width
            h = im.height
            frame = Frame(i)

            frame.ground_truth = gt[frame.id]

            # skip j = 0, because it's the background class
            for j in (2, 6, 7, 14):
                dets = detections[0, j, :]
                mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, 5)
                if dets.size(0) == 0:
                    continue
                boxes = dets[:, 1:]
                scores = dets[:, 0].cpu().numpy()
                cls_dets = np.hstack((boxes.cpu().numpy(),
                                      scores[:,
                                             np.newaxis])).astype(np.float32,
                                                                  copy=False)
                for cls_det in cls_dets:
                    x1 = int(w * cls_det[0])
                    y1 = int(h * cls_det[1])
                    det = Detection(-1,
                                    labels[j - 1], (x1, y1),
                                    width=w * (cls_det[2] - cls_det[0]),
                                    height=h * (cls_det[3] - cls_det[1]),
                                    confidence=cls_det[4])
                    frame.detections.append(det)

            # kalman(frame)
            if tracking is not None:
                tracking(frame, frames, debug=debug)
            frames.append(frame)

            if debug:
                plt.figure()
                for det in frame.detections:
                    rect = patches.Rectangle(det.top_left,
                                             det.width,
                                             det.height,
                                             linewidth=2,
                                             edgecolor='blue',
                                             facecolor='none')
                    plt.gca().add_patch(rect)
                    plt.text(det.top_left[0],
                             det.top_left[1],
                             s='{} ~ {}'.format(det.label, det.id),
                             color='white',
                             verticalalignment='top',
                             bbox={
                                 'color': 'blue',
                                 'pad': 0
                             })
                plt.imshow(im)
                plt.axis('off')
                # plt.savefig('../video/video_ssd_KalmanID/frame_{:04d}'.format(i))
                plt.show()
                plt.close()

        #iou_over_time(frames)
        mAP = mean_average_precision(frames)
        print("SSD mAP:", mAP)