Beispiel #1
0
def task1_1(save_path=None):
    reader = AICityChallengeAnnotationReader(
        path='data/ai_challenge_s03_c010-full_annotation.xml')
    gt = reader.get_annotations(classes=['car'])

    # add probability to delete bounding boxes
    drop_values = np.linspace(0, 1, 11)
    maps = []
    for drop in drop_values:
        noise_params = {'drop': drop, 'mean': 0, 'std': 0}
        gt_noisy = reader.get_annotations(classes=['car'],
                                          noise_params=noise_params)

        y_true = []
        y_pred = []
        for frame in gt.keys():
            y_true.append(gt.get(frame))
            y_pred.append(gt_noisy.get(frame, []))

        map, _, _ = mean_average_precision(y_true, y_pred)
        maps.append(map)

    plt.plot(drop_values, maps)
    plt.xticks(drop_values)
    plt.xlabel('drop prob')
    plt.ylabel('mAP')
    plt.show()
    if save_path is not None:
        plt.savefig(os.path.join(save_path, 'map_drop_bbox.png'))

    # add noise to the size and position of bounding boxes
    std_values = np.linspace(0, 100, 11)
    maps = []
    for std in std_values:
        noise_params = {'drop': 0, 'mean': 0, 'std': std}
        gt_noisy = reader.get_annotations(classes=['car'],
                                          noise_params=noise_params)

        y_true = []
        y_pred = []
        for frame in gt.keys():
            y_true.append(gt.get(frame))
            y_pred.append(gt_noisy.get(frame, []))

        map, _, _ = mean_average_precision(y_true, y_pred)
        maps.append(map)

    plt.xlabel('std')
    plt.ylabel('mAP')
    plt.xticks(std_values)
    plt.plot(std_values, maps)
    plt.show()
    if save_path is not None:
        plt.savefig(os.path.join(save_path, 'map_noisy_bbox.png'))
Beispiel #2
0
def task2_2(debug=False,
            det_path='data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt'):
    """
    Object tracking: tracking with a Kalman filter
    """

    reader = AICityChallengeAnnotationReader(
        path='data/ai_challenge_s03_c010-full_annotation.xml')
    gt = reader.get_annotations(classes=['car'])
    reader = AICityChallengeAnnotationReader(path=det_path)
    dets = reader.get_annotations(classes=['car'])

    cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi')

    tracker = Sort()
    tracks = defaultdict(list)

    y_true = []
    y_pred = []
    acc = MOTAcumulator()
    for frame in dets.keys():
        detections = dets.get(frame, [])

        new_detections = tracker.update(
            np.array([[*d.bbox, d.score] for d in detections]))
        new_detections = [
            Detection(frame, int(d[-1]), 'car', *d[:4]) for d in new_detections
        ]

        y_true.append(gt.get(frame, []))
        y_pred.append(new_detections)

        acc.update(y_true[-1], y_pred[-1])

        if debug:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
            ret, img = cap.read()
            for d in new_detections:
                tracks[d.id].append(d.bbox)
                np.random.seed(d.id)
                color = tuple(np.random.randint(0, 256, 3).tolist())
                for dd in tracks[d.id]:
                    cv2.circle(img, (int(
                        (dd[0] + dd[2]) / 2), int((dd[1] + dd[3]) / 2)), 5,
                               color, -1)

            cv2.imshow('image', cv2.resize(img, (900, 600)))
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()

    ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'])
    idf1, idp, idr = acc.get_idf1()
    print(
        f"AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, IDF1: {idf1:.4f}, IDP: {idp:.4f}, IDR: {idr:.4f}"
    )
Beispiel #3
0
def evaluate(model, data_loader, device, save_path=None):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    y_true = []
    y_pred = []
    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()

        model_time = time.time()
        outputs = model(image)
        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time
        metric_logger.update(model_time=model_time)

        for target, output in zip(targets, outputs):
            frame = target['image_id'].item()
            y_true.append([
                Detection(frame, None, label, *box)
                for box, label in zip(target['boxes'], target['labels'])
            ])
            y_pred.append([
                Detection(frame, None, label, *box, score)
                for box, label, score in zip(output['boxes'], output['labels'],
                                             output['scores'])
            ])

    evaluator_time = time.time()
    map, _, _ = mean_average_precision(y_true, y_pred, sort_method='score')
    evaluator_time = time.time() - evaluator_time
    metric_logger.update(map=map, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)

    torch.set_num_threads(n_threads)

    if save_path:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        with open(save_path, 'w') as f:
            for frame_dets in y_pred:
                for d in frame_dets:
                    f.write(
                        f'{d.frame}, -1, {d.xtl}, {d.ytl}, {d.width}, {d.height}, {d.score}, -1, -1, -1\n'
                    )
Beispiel #4
0
def task1_2():
    reader = AICityChallengeAnnotationReader(
        path='data/ai_challenge_s03_c010-full_annotation.xml')
    gt = reader.get_annotations(classes=['car'])

    for detector in ['mask_rcnn', 'ssd512', 'yolo3']:
        reader = AICityChallengeAnnotationReader(
            path=f'data/AICity_data/train/S03/c010/det/det_{detector}.txt')
        det = reader.get_annotations(classes=['car'])

        y_true = []
        y_pred = []
        for frame in gt.keys():
            y_true.append(gt.get(frame))
            y_pred.append(det.get(frame, []))

        map, _, _ = mean_average_precision(y_true, y_pred)
        print(f'{detector} mAP: {map:.4f}')
Beispiel #5
0
def task4(adaptive,
          random_search,
          color_space,
          channels,
          model_frac=0.25,
          save_path=None,
          min_width=120,
          max_width=800,
          min_height=100,
          max_height=600,
          debug=0):
    """
    Color modelling
    """
    n_ch = len(channels)

    # Read information
    reader = AICityChallengeAnnotationReader(
        path='data/AICity_data/train/S03/c010/gt/gt.txt')
    gt = reader.get_annotations(classes=['car'], only_not_parked=True)
    roi = cv2.imread('data/AICity_data/train/S03/c010/roi.jpg',
                     cv2.IMREAD_GRAYSCALE)

    # Model Background
    bg_model = SingleGaussianBackgroundModel(
        video_path='data/AICity_data/train/S03/c010/vdo.avi',
        color_space=color_space,
        channels=channels,
        resize=None)
    video_length = bg_model.length
    bg_model.fit(start=0, length=int(video_length * 0.25))

    # Video length
    start_frame = int(video_length * model_frac)
    end_frame = int(video_length)

    # hyperparameter search
    if random_search:
        alphas = np.random.choice(np.linspace(2, 4, 50), 25)
        rhos = np.random.choice(np.linspace(0.001, 0.1, 50),
                                25) if adaptive else [0]
        combinations = [(alpha, rho) for alpha, rho in zip(alphas, rhos)]
    else:
        alphas = [3.5]
        rhos = [0.005] if adaptive else [0]
        combinations = [(alpha, rho) for alpha in alphas for rho in rhos]

    for alpha, rho in combinations:
        y_true = []
        y_pred = []

        if save_path:
            gif_name = f'100_task3_alpha_{str(alpha)}_rho_{str(rho)}_color_{color_space}_channels_{str(n_ch)}_{time.time()}.gif'
            writer = imageio.get_writer(os.path.join(save_path, gif_name),
                                        fps=10)

        for frame in trange(
                start_frame,
                end_frame,
                desc=
                f'obtaining foreground and detecting objects. Alpha {alpha} Rho {rho}'
        ):
            if frame == 635:
                break
            frame_img, mask, _ = bg_model.evaluate(frame=frame, alpha=alpha)
            mask = mask & roi
            non_post_mask = mask
            mask = postprocess(mask)

            detections = bounding_boxes(mask, min_height, max_height,
                                        min_width, max_width, frame)
            annotations = gt.get(frame, [])

            if save_path:
                img = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)

                for det in detections:
                    cv2.rectangle(img, (det.xtl, det.ytl), (det.xbr, det.ybr),
                                  (0, 255, 0), 3)

                for det in annotations:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), (0, 0, 255), 2)

                writer.append_data(img)

                if debug >= 1:
                    shape = (480, 270)
                    cv2.imshow(f'BGR Image', cv2.resize(img, shape))
                    cv2.imshow(f'Segmentation using {color_space}',
                               cv2.resize(non_post_mask, shape))
                    cv2.imshow(f'Segmentation Morphed using {color_space}',
                               cv2.resize(mask, shape))

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            y_pred.append(detections)
            y_true.append(annotations)

        cv2.destroyAllWindows()
        if save_path:
            writer.close()

        ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'])
        print(f'alpha: {alpha:.1f}, rho: {rho:.3f}, AP: {ap:.4f}')
        print(f'prec: {prec:.4f}, Recall {rec:.4f}')
Beispiel #6
0
def task1_2(adaptive,
            random_search,
            model_frac=0.25,
            min_width=120,
            max_width=800,
            min_height=100,
            max_height=600,
            debug=0,
            save_path=None):
    reader = AICityChallengeAnnotationReader(
        path='data/AICity_data/train/S03/c010/gt/gt.txt')
    gt = reader.get_annotations(classes=['car'], only_not_parked=True)

    roi = cv2.imread('data/AICity_data/train/S03/c010/roi.jpg',
                     cv2.IMREAD_GRAYSCALE)

    bg_model = SingleGaussianBackgroundModel(
        video_path='data/AICity_data/train/S03/c010/vdo.avi')
    video_length = bg_model.length
    bg_model.fit(start=0, length=int(video_length * model_frac))

    start_frame = int(video_length * model_frac)
    end_frame = video_length

    # hyperparameter search
    if random_search:
        alphas = np.random.choice(np.linspace(2, 4, 50), 25)
        rhos = np.random.choice(np.linspace(0.001, 0.1, 50),
                                25) if adaptive else [0]
        combinations = [(alpha, rho) for alpha, rho in zip(alphas, rhos)]
    else:
        alphas = [2, 2.5, 3, 3.5, 4]
        rhos = [0.005, 0.01, 0.025, 0.05, 0.1] if adaptive else [0]
        combinations = [(alpha, rho) for alpha in alphas for rho in rhos]

    for alpha, rho in combinations:
        if save_path:
            writer = imageio.get_writer(os.path.join(
                save_path, f'task1_2_alpha{alpha:.1f}_rho{rho:.3f}.gif'),
                                        fps=10)

        y_true = []
        y_pred = []
        for frame in trange(start_frame, end_frame, desc='evaluating frames'):
            _, mask, _ = bg_model.evaluate(frame=frame, alpha=alpha, rho=rho)
            mask = mask & roi
            mask = postprocess(mask)

            detections = bounding_boxes(mask, min_height, max_height,
                                        min_width, max_width, frame)

            annotations = gt.get(frame, [])

            if debug >= 1 or save_path:
                img = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
                for det in detections:
                    cv2.rectangle(img, (det.xtl, det.ytl), (det.xbr, det.ybr),
                                  (0, 255, 0), 2)
                for det in annotations:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), (0, 0, 255), 2)

                if save_path:
                    writer.append_data(img)

                if debug >= 1:
                    cv2.imshow('result', img)
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break

            y_pred.append(detections)
            y_true.append(annotations)

        cv2.destroyAllWindows()

        if save_path:
            writer.close()

        ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'])
        print(f'alpha: {alpha:.1f}, rho: {rho:.3f}, AP: {ap:.4f}')
Beispiel #7
0
def task3(methods,
          model_frac=0.25,
          min_width=120,
          max_width=800,
          min_height=100,
          max_height=600,
          save_path=None,
          debug=0):
    """
    Comparison with the state of the art
    """

    reader = AICityChallengeAnnotationReader(
        path='data/AICity_data/train/S03/c010/gt/gt.txt')
    gt = reader.get_annotations(classes=['car'], only_not_parked=True)

    roi = cv2.imread('data/AICity_data/train/S03/c010/roi.jpg',
                     cv2.IMREAD_GRAYSCALE)

    cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi')
    video_length = cap.get(cv2.CAP_PROP_FRAME_COUNT)

    start_frame = int(video_length * model_frac)
    end_frame = int(video_length)

    for method in methods:
        backSub = sota_bg_subtractor(method)
        for _ in trange(start_frame, desc='modelling background'):
            ret, img = cap.read()
            backSub.apply(img)

        if save_path:
            writer = imageio.get_writer(os.path.join(
                save_path, f'task3_method_' + method + '.gif'),
                                        fps=10)

        y_pred = []
        y_true = []
        for frame in trange(start_frame, end_frame, desc='evaluating frames'):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
            ret, img = cap.read()

            mask = backSub.apply(img)
            mask = mask & roi
            mask = postprocess(mask)

            detections = bounding_boxes(mask, min_height, max_height,
                                        min_width, max_width, frame)
            annotations = gt.get(frame, [])

            if debug >= 1 or save_path:
                img = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
                for det in detections:
                    cv2.rectangle(img, (det.xtl, det.ytl), (det.xbr, det.ybr),
                                  (0, 255, 0), 2)
                for det in annotations:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), (0, 0, 255), 2)

                if save_path:
                    writer.append_data(img)
                elif debug == 1:
                    cv2.imshow('result', img)
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break

            y_pred.append(detections)
            y_true.append(annotations)

        cv2.destroyAllWindows()
        if save_path:
            writer.close()

        ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'])
        print(
            f'Method: {method}, AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}'
        )
Beispiel #8
0
def task1_1(architecture,
            start=0,
            length=None,
            save_path='results/week3',
            gpu=0,
            visualize=False,
            save_detection='detection_results/'):
    """
    Object detection: off-the-shelf
    """

    tensor = transforms.ToTensor()

    if architecture.lower() == 'fasterrcnn':
        model = detection.fasterrcnn_resnet50_fpn(pretrained=True)

    elif architecture.lower() == 'maskrcnn':
        model = detection.maskrcnn_resnet50_fpn(pretrained=True)
    else:
        raise ValueError(architecture)
    save_path = os.path.join(save_path, architecture)

    # Read Video and prepare ground truth
    cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi')
    if not length:
        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    reader = AICityChallengeAnnotationReader(
        path='data/ai_challenge_s03_c010-full_annotation.xml')
    gt = reader.get_annotations(classes=['car'])
    gt = {frame: gt[frame] for frame in range(start, start + length)}

    # Start Inference
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
    model.to(device)
    model.eval()
    detections = {}
    y_true, y_pred = [], []

    if save_detection:
        path = os.path.join(save_detection, architecture)
        if not os.path.exists(path):
            os.makedirs(path)
        detection_file = open(f'{path}/{architecture.lower()}.txt', 'w')

    with torch.no_grad():
        for frame in range(start, length):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
            ret, img = cap.read()

            # Transform input to tensor
            print(f'Predict: {frame}')
            start_t = time.time()

            x = [tensor(img).to(device)]
            preds = model(x)[0]
            print(
                f'Inference time per frame: {round(time.time() - start_t, 2)}')

            # filter car predictions and confidences
            joint_preds = list(
                zip(preds['labels'], preds['boxes'], preds['scores']))
            car_det = list(filter(lambda x: x[0] == 3, joint_preds))
            # car_det = list(filter(lambda x: x[2] > 0.70, car_det))
            car_det = get_nms(car_det, 0.7)

            # add detections
            detections[frame] = []
            for det in car_det:
                det_obj = Detection(frame=frame,
                                    id=None,
                                    label='car',
                                    xtl=float(det[1][0]),
                                    ytl=float(det[1][1]),
                                    xbr=float(det[1][2]),
                                    ybr=float(det[1][3]),
                                    score=det[2])

                detections[frame].append(det_obj)

                if save_detection:
                    detection_file.write(
                        f"{frame},-1,{det_obj.xtl},{det_obj.ytl},{det_obj.width},{det_obj.height},{det_obj.score},-1,-1,-1\n"
                    )

            y_pred.append(detections[frame])
            y_true.append(gt.get(frame, []))

    ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'])
    print(
        f'Network: {architecture}, AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}'
    )

    if visualize:
        print(f'Saving result to {save_path}')
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        video_iou_plot(gt,
                       detections,
                       video_path='data/AICity_data/train/S03/c010/vdo.avi',
                       title=f'{architecture} detections',
                       save_path=save_path)

    cv2.destroyAllWindows()

    if save_detection:
        detection_file.close()
Beispiel #9
0
def task2_1(debug=False,
            save_path=None,
            det_path='data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt'):
    """
    Object tracking: tracking by overlap
    """

    cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi')

    reader = AICityChallengeAnnotationReader(
        path='data/ai_challenge_s03_c010-full_annotation.xml')
    gt = reader.get_annotations(classes=['car'])
    reader = AICityChallengeAnnotationReader(path=det_path)
    dets = reader.get_annotations(classes=['car'])

    if save_path:
        writer = imageio.get_writer(os.path.join(save_path, f'task21.gif'),
                                    fps=10)

    accumulator = MOTAcumulator()
    y_true = []
    y_pred = []
    y_pred_refined = []
    tracks = []
    max_track = 0
    for frame in dets.keys():
        if debug or save_path:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
            ret, img = cap.read()

        detections_on_frame = dets.get(frame, [])
        tracks, frame_tracks, max_track = update_tracks_by_overlap(
            tracks, detections_on_frame, max_track)

        frame_detections = []
        for track in frame_tracks:
            det = track.last_detection()
            frame_detections.append(det)
            if debug or save_path:
                cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                              (int(det.xbr), int(det.ybr)), track.color, 2)
                cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                              (int(det.xbr), int(det.ytl) - 15), track.color,
                              -2)
                cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)),
                            cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
                for dd in track.detections:
                    cv2.circle(img, dd.center, 5, track.color, -1)

        y_pred_refined.append(frame_detections)
        y_pred.append(detections_on_frame)
        y_true.append(gt.get(frame, []))

        accumulator.update(y_true[-1], y_pred_refined[-1])

        if save_path:
            writer.append_data(cv2.resize(img, (600, 350)))

        elif debug:
            cv2.imshow('result', cv2.resize(img, (900, 600)))
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cv2.destroyAllWindows()
    if save_path:
        writer.close()

    ap, prec, rec = mean_average_precision(y_true,
                                           y_pred,
                                           classes=['car'],
                                           sort_method='score')
    print(f'Original AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}')
    ap, prec, rec = mean_average_precision(y_true,
                                           y_pred_refined,
                                           classes=['car'],
                                           sort_method='score')
    print(
        f'After refinement AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}'
    )
    print('\nAdditional metrics:')
    print(accumulator.get_idf1())
Beispiel #10
0
def launch_test_kalman_filter(save_path, distance_thresholds, min_track_len,
                              min_width, min_height, sequence, camera,
                              detector):
    save_video = False
    save_summary = False
    fps = 24
    os.makedirs(save_path, exist_ok=True)

    reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' +
                                             sequence + '/' + camera +
                                             '/gt/gt.txt')
    gt = reader.get_annotations(classes=['car'])
    reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' +
                                             sequence + '/' + camera +
                                             '/det/det_' + detector + '.txt')
    dets = reader.get_annotations(classes=['car'])

    cap = cv2.VideoCapture('data/AICity_data/train/' + sequence + '/' +
                           camera + '/vdo.avi')
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if save_video:
        writer = imageio.get_writer(os.path.join(
            save_path,
            'task1_' + sequence + '_' + camera + '_' + detector + '.gif'),
                                    fps=fps)

    tracker = Sort()
    y_true = []
    tracks = []
    max_track = 0
    video_percentage = 1
    start = 0
    end = int(n_frames * video_percentage)

    for frame in trange(start, end, desc='Tracking'):

        detections_on_frame_ = dets.get(frame, [])
        detections_on_frame = []
        for d in detections_on_frame_:
            if min_width < (d.ybr - d.ytl) and min_height < (d.xbr - d.xtl):
                detections_on_frame.append(d)

        detections_on_frame = tracker.update(
            np.array([[*d.bbox, d.score] for d in detections_on_frame]))
        detections_on_frame = [
            Detection(frame, int(d[-1]), 'car', *d[:4])
            for d in detections_on_frame
        ]

        tracks, frame_tracks, max_track = update_tracks_by_overlap(
            tracks,
            detections_on_frame,
            max_track,
            refinement=False,
            optical_flow=None)

        y_true.append(gt.get(frame, []))

    idf1s = []
    for distance_threshold in distance_thresholds:
        accumulator = MOTAcumulator()
        y_pred = []

        moving_tracks = remove_static_tracks(tracks, distance_threshold,
                                             min_track_len)
        detections = []
        for track in moving_tracks:
            detections.extend(track.detections)
        detections = group_by_frame(detections)

        for frame in trange(start, end, desc='Accumulating detections'):

            if save_video:
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
                ret, img = cap.read()

                for det in y_true[frame]:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), (0, 255, 0), 6)

            frame_detections = []
            for det in detections.get(frame, []):
                frame_detections.append(det)
                if save_video:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), track.color, 6)
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ytl) - 15),
                                  track.color, -6)
                    cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)),
                                cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 6)
                    cv2.circle(img, track.detections[-1].center, 5,
                               track.color, -1)

            y_pred.append(frame_detections)

            if save_video:
                writer.append_data(cv2.resize(img, (600, 350)))

            accumulator.update(y_true[frame], y_pred[-1])

        ap, prec, rec = mean_average_precision(y_true,
                                               y_pred,
                                               classes=['car'],
                                               sort_method=None)
        print(f'AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}')
        print('Additional metrics:')
        summary = accumulator.get_idf1()
        # As mentioned in https://github.com/cheind/py-motmetrics:
        #     FAR = FalsePos / Frames * 100
        #     MOTP = (1 - MOTP) * 100
        print(summary)

        if save_summary:
            with open(
                    os.path.join(
                        save_path, 'task1_' + sequence + '_' + camera + '_' +
                        detector + '_' + str(distance_threshold) + '.txt'),
                    'w') as f:
                f.write(str(summary))

        idf1s.append(summary['idf1']['acc'] * 100)

    cv2.destroyAllWindows()
    if save_video:
        writer.close()

    return idf1s
Beispiel #11
0
def launch_test_optical_flow(save_path, distance_thresholds, min_track_len,
                             min_width, min_height, sequence, camera,
                             detector):
    save_video = False
    save_summary = False
    fps = 24
    os.makedirs(save_path, exist_ok=True)

    reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' +
                                             sequence + '/' + camera +
                                             '/gt/gt.txt')
    gt = reader.get_annotations(classes=['car'])
    reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' +
                                             sequence + '/' + camera +
                                             '/det/det_' + detector + '.txt')
    dets = reader.get_annotations(classes=['car'])

    cap = cv2.VideoCapture('data/AICity_data/train/' + sequence + '/' +
                           camera + '/vdo.avi')
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if save_video:
        writer = imageio.get_writer(os.path.join(
            save_path,
            'task1_' + sequence + '_' + camera + '_' + detector + '.gif'),
                                    fps=fps)

    y_true = []
    tracks = []
    max_track = 0
    previous_frame = None
    video_percentage = 1
    start = 0
    end = int(n_frames * video_percentage)

    for frame in trange(start, end, desc='Tracking'):

        cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
        ret, img = cap.read()

        detections_on_frame_ = dets.get(frame, [])
        detections_on_frame = []
        for d in detections_on_frame_:
            if min_width < (d.ybr - d.ytl) and min_height < (d.xbr - d.xtl):
                detections_on_frame.append(d)

        if frame == 0 or not detections_on_frame:
            optical_flow = None
        else:
            height, width = previous_frame.shape[:2]

            # get points on which to detect the flow
            points = []
            for det in detections_on_frame:
                points.append([det.xtl, det.ytl])
                points.append([det.xbr, det.ybr])
            p0 = np.array(points, dtype=np.float32)

            # params for lucas-kanade optical flow
            lk_params = dict(winSize=(15, 15),
                             maxLevel=2,
                             criteria=(cv2.TERM_CRITERIA_EPS
                                       | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

            p1, st, err = cv2.calcOpticalFlowPyrLK(previous_frame, img, p0,
                                                   None, **lk_params)

            p0 = p0.reshape((len(detections_on_frame) * 2, 2))
            p1 = p1.reshape((len(detections_on_frame) * 2, 2))
            st = st.reshape(len(detections_on_frame) * 2)

            # flow field computed by subtracting prev points from next points
            flow = p1 - p0
            flow[st == 0] = 0

            optical_flow = np.zeros((height, width, 2), dtype=np.float32)
            for jj, det in enumerate(detections_on_frame):
                optical_flow[int(det.ytl), int(det.xtl)] = flow[2 * jj]
                optical_flow[int(det.ybr), int(det.xbr)] = flow[2 * jj + 1]

        previous_frame = img.copy()

        tracks, frame_tracks, max_track = update_tracks_by_overlap(
            tracks,
            detections_on_frame,
            max_track,
            refinement=False,
            optical_flow=optical_flow)

        y_true.append(gt.get(frame, []))

    idf1s = []
    for distance_threshold in distance_thresholds:
        accumulator = MOTAcumulator()
        y_pred = []

        moving_tracks = remove_static_tracks(tracks, distance_threshold,
                                             min_track_len)
        detections = []
        for track in moving_tracks:
            detections.extend(track.detections)
        detections = group_by_frame(detections)

        for frame in trange(start, end, desc='Accumulating detections'):

            if save_video:
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
                ret, img = cap.read()

                for det in y_true[frame]:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), (0, 255, 0), 6)

            frame_detections = []
            for det in detections.get(frame, []):
                frame_detections.append(det)
                if save_video:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), track.color, 6)
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ytl) - 15),
                                  track.color, -6)
                    cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)),
                                cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 6)
                    cv2.circle(img, track.detections[-1].center, 5,
                               track.color, -1)

            y_pred.append(frame_detections)

            if save_video:
                writer.append_data(cv2.resize(img, (600, 350)))

            accumulator.update(y_true[frame], y_pred[-1])

        ap, prec, rec = mean_average_precision(y_true,
                                               y_pred,
                                               classes=['car'],
                                               sort_method='score')
        print(f'AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}')
        print('Additional metrics:')
        summary = accumulator.get_idf1()
        # As mentioned in https://github.com/cheind/py-motmetrics:
        #     FAR = FalsePos / Frames * 100
        #     MOTP = (1 - MOTP) * 100
        print(summary)

        if save_summary:
            with open(
                    os.path.join(
                        save_path, 'task1_' + sequence + '_' + camera + '_' +
                        detector + '_' + str(distance_threshold) + '.txt'),
                    'w') as f:
                f.write(str(summary))

        idf1s.append(summary['idf1']['acc'] * 100)

    cv2.destroyAllWindows()
    if save_video:
        writer.close()

    return idf1s
Beispiel #12
0
def task3_1(video_percentage=1):
    # Tracking with optical flow

    cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi')
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    save_path = 'results/week4/task_31'
    os.makedirs(save_path, exist_ok=True)

    reader = AICityChallengeAnnotationReader(
        path='data/ai_challenge_s03_c010-full_annotation.xml')
    gt = reader.get_annotations(classes=['car'])
    reader = AICityChallengeAnnotationReader(
        path='data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt')
    dets = reader.get_annotations(classes=['car'])

    if save_path:
        writer = imageio.get_writer(os.path.join(save_path, f'task31.gif'),
                                    fps=fps)

    accumulator = MOTAcumulator()
    y_true = []
    y_pred = []
    y_pred_refined = []
    tracks = []
    max_track = 0
    previous_frame = None
    end = int(n_frames * video_percentage)
    for i, frame in tqdm(enumerate(dets.keys())):
        if i == end:
            break

        if save_path:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
            ret, img = cap.read()

        if i == 0:
            optical_flow = None
        else:
            height, width = previous_frame.shape[:2]

            # get points on which to detect the flow
            points = []
            for det in detections_on_frame:
                points.append([det.xtl, det.ytl])
                points.append([det.xbr, det.ybr])
            p0 = np.array(points, dtype=np.float32)

            # params for lucas-kanade optical flow
            lk_params = dict(winSize=(15, 15),
                             maxLevel=2,
                             criteria=(cv2.TERM_CRITERIA_EPS
                                       | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

            p1, st, err = cv2.calcOpticalFlowPyrLK(previous_frame, img, p0,
                                                   None, **lk_params)

            p0 = p0.reshape((len(detections_on_frame) * 2, 2))
            p1 = p1.reshape((len(detections_on_frame) * 2, 2))
            st = st.reshape(len(detections_on_frame) * 2)

            # flow field computed by subtracting prev points from next points
            flow = p1 - p0
            flow[st == 0] = 0

            optical_flow = np.zeros((height, width, 2), dtype=np.float32)
            for jj, det in enumerate(detections_on_frame):
                optical_flow[int(det.ytl), int(det.xtl)] = flow[2 * jj]
                optical_flow[int(det.ybr), int(det.xbr)] = flow[2 * jj + 1]

        previous_frame = img.copy()

        detections_on_frame = dets.get(frame, [])
        tracks, frame_tracks, max_track = update_tracks_by_overlap(
            tracks,
            detections_on_frame,
            max_track,
            refinement=False,
            optical_flow=optical_flow)

        frame_detections = []
        for track in frame_tracks:
            det = track.last_detection()
            frame_detections.append(det)
            if save_path:
                cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                              (int(det.xbr), int(det.ybr)), track.color, 2)
                cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                              (int(det.xbr), int(det.ytl) - 15), track.color,
                              -2)
                cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)),
                            cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
                for dd in track.detections:
                    cv2.circle(img, dd.center, 5, track.color, -1)

        y_pred_refined.append(frame_detections)
        y_pred.append(detections_on_frame)
        y_true.append(gt.get(frame, []))

        accumulator.update(y_true[-1], y_pred_refined[-1])

        if save_path:
            writer.append_data(cv2.resize(img, (600, 350)))

    cv2.destroyAllWindows()
    if save_path:
        writer.close()

    ap, prec, rec = mean_average_precision(y_true,
                                           y_pred,
                                           classes=['car'],
                                           sort_method='score')
    print(f'Original AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}')
    ap, prec, rec = mean_average_precision(y_true,
                                           y_pred_refined,
                                           classes=['car'],
                                           sort_method='score')
    print(
        f'After refinement AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}'
    )
    print('\nAdditional metrics:')
    print(accumulator.get_idf1())