Ejemplo n.º 1
0
def alter_detections(detections: List[Detection]) -> List[Detection]:
    prob_not = 0.05
    translation = 30
    scale = [0.5, 2]
    prob_fp = 0.1

    frame_detections = []

    for d in detections:
        if random.uniform(0, 1) < prob_not:
            continue

        tl_x, tl_y = d.top_left
        tl_x += random.uniform(0, 1) * translation
        tl_y += random.uniform(0, 1) * translation
        width = d.width * random.uniform(scale[0], scale[1])
        height = d.height * random.uniform(scale[0], scale[1])

        frame_detections.append(
            Detection(d.id, d.label, (tl_x, tl_y), width, height))
        while random.uniform(0, 1) < prob_fp:
            frame_detections.append(
                Detection('', 'car',
                          (random.uniform(0, 100), random.uniform(0, 900)),
                          random.uniform(50, 150), random.uniform(50, 150)))

    return frame_detections
 def _find_id(self, detection: Detection, dets_old: List[Detection]) -> None:
     if self.prev_det is None:
         return
     for detection2 in dets_old:
         if detection.iou(detection2) > INTERSECTION_THRESHOLD:
             detection.id = detection2.id
             break
def _find_id(det_new: Detection, dets_old: List[Detection], im2, debug: bool = False) -> None:
    for det in dets_old:
        if det_new.iou(det) > INTERSECTION_THRESHOLD:
            if debug:
                rect = patches.Rectangle((det.top_left[1], det.top_left[0]), det.height, det.width,
                                         linewidth=1, edgecolor='blue', facecolor='none')
                plt.gca().add_patch(rect)
                rect = patches.Rectangle((det_new.top_left[1], det_new.top_left[0]), det_new.height, det_new.width,
                                         linewidth=1, edgecolor='red', facecolor='none')
                plt.gca().add_patch(rect)
            det_new.id = det.id
            break
    def __call__(self, frame: Frame, siamese: SiameseDB, debug=False, plot_number=False) -> None:
        self.debug = debug
        det1_flow = []
        if self.prev_img is not None:
            flow = self._optical_flow(frame.image)
            if debug:
                show_optical_flow_arrows(frame.image, flow)
            for det in self.prev_det:
                det_flow = flow[det.top_left[1]:det.top_left[1] + det.height,
                           det.top_left[0]:det.top_left[0] + det.width, :]
                accum_flow = (0, 0)
                non_zero_values = det_flow[np.logical_or(det_flow[:, :, 0] != 0, det_flow[:, :, 1] != 0), :]
                if non_zero_values.size > 0:
                    accum_flow = np.mean(non_zero_values, axis=0)
                det1_flow.append(
                    Detection(det.id, det.label,
                              (int(det.top_left[0] + accum_flow[1]), int(det.top_left[1] + accum_flow[0])),
                              det.width, det.height))

        for detection in frame.detections:
            self._find_id(detection, det1_flow)
            if detection.id == -1:
                if siamese is not None:
                    new_id = siamese.query(frame.image, detection)
                    if new_id != -1:
                        detection.id = new_id
                    else:
                        detection.id = IDGenerator.next()
                else:
                    detection.id = IDGenerator.next()
        self.prev_det = frame.detections
        self.prev_img = frame.image

        if debug:
            self.plot_tracking_color(frame, plot_number)
Ejemplo n.º 5
0
def read_annotations(file_path: str, frames: int = 2140) -> List[List[Detection]]:
    frames_detections = []

    root = ET.parse(file_path).getroot()
    tracks = root.findall('track')

    for i in range(frames + 1):
        frame_detections = []
        for track in tracks:
            id_value = int(track.attrib["id"])
            label = track.attrib["label"]
            if label == 'bike':
                label = 'bicycle'
            box = track.find('box[@frame="{}"]'.format(i))
            if box is not None:
                xtl = int(float((box.attrib["xtl"])))
                ytl = int(float((box.attrib["ytl"])))
                xbr = int(float((box.attrib["xbr"])))
                ybr = int(float((box.attrib["ybr"])))

                frame_detections.append(Detection(id_value, label, (xtl, ytl), xbr - xtl + 1, ybr - ytl + 1))

        frames_detections.append(frame_detections)

    return frames_detections
Ejemplo n.º 6
0
def read_detections(path: str) -> List[List[Detection]]:
    # [frame, -1, left, top, width, height, conf, -1, -1, -1]
    frame_detections = []
    with open(path) as f:
        for line in f.readlines():
            parts = line.split(',')

            frame_id = int(parts[0])
            while frame_id > len(frame_detections):
                frame_detections.append([])

            tl_x = int(float(parts[2]))
            tl_y = int(float(parts[3]))
            width = int(float(parts[4]))
            height = int(float(parts[5]))
            confidence = float(parts[6])

            frame_detections[-1].append(
                Detection(int(parts[1]),
                          'car', (tl_x, tl_y),
                          width,
                          height,
                          confidence=confidence))

    return frame_detections
def overlap_flow_tracking(optical_flow_method,
                          im1: np.ndarray, det1: List[Detection],
                          im2: np.ndarray, det2: List[Detection],
                          debug: bool = False,
                          mot=None, gt1=None, count=0):
    feature_params = dict(maxCorners=500,
                          qualityLevel=0.3,
                          minDistance=7,
                          blockSize=7)
    det1_flow = []
    if im1 is not None:
        mask = np.zeros((im1.shape[0], im1.shape[1]), dtype=np.uint8)
        for det in det1:
            mask[det.top_left[0]:det.top_left[0] + det.width, det.top_left[1]:det.top_left[1] + det.height] = 255

        p0 = cv2.goodFeaturesToTrack(cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY), mask=mask, **feature_params)
        flow = optical_flow_method(im1, im2, p0)

        for det in det1:
            det_flow = flow[det.top_left[0]:det.top_left[0] + det.width, det.top_left[1]:det.top_left[1] + det.height,
                       :]
            accum_flow = np.mean(det_flow[np.logical_or(det_flow[:, :, 0] != 0, det_flow[:, :, 1] != 0), :], axis=0)
            if np.isnan(accum_flow).any():
                accum_flow = (0, 0)
            det1_flow.append(
                Detection(det.id, det.label,
                          (int(det.top_left[0] + accum_flow[1]), int(det.top_left[1] + accum_flow[0])),
                          det.width, det.height))

    if debug:
        plt.figure(figsize=(8, 3))
        plt.subplot(1, 2, 2)
    for det in det2:
        if im1 is not None:
            _find_id(det, det1_flow, im2, debug=debug)

        if det.id == -1:
            det.id = IDGenerator.next()

    if debug:
        plt.imshow(cv2.cvtColor(im2, cv2.COLOR_BGR2RGB))
        plt.axis('off')

        plt.subplot(1, 2, 1)
        if det1 is not None:
            for det in det1:
                rect = patches.Rectangle((det.top_left[1], det.top_left[0]), det.height, det.width,
                                         linewidth=1, edgecolor='blue', facecolor='none')
                plt.gca().add_patch(rect)
            plt.imshow(cv2.cvtColor(im1, cv2.COLOR_BGR2RGB))
            plt.axis('off')

            plt.savefig('../video/tracking/{:04d}'.format(count))

        plt.close()

    if mot is not None and gt1 is not None:
        mot.update(det1_flow, gt1)
Ejemplo n.º 8
0
def main():
    im_1440 = cv2.imread(
        "../datasets/AICity_data_S03_c010_1440/frame_1440.jpg")
    top_left = [995, 410]
    width = 1241 - 995
    height = 605 - 410

    ground_truth = [Detection('', 'car', top_left, width, height)]
    """
        DETECTIONS FROM ALTERED GROUND TRUTH 
    """
    frame = Frame(0, ground_truth)
    frame.detections = alter_detections(ground_truth)

    plot_frame(im_1440, frame)
    iou = frame.get_detection_iou()
    iou_mean = frame.get_detection_iou_mean()
    print("IOU: ", iou, "IOU mean", iou_mean)
Ejemplo n.º 9
0
def read_annotations(root_directory: str, start: int, end: int) -> List[List[Detection]]:
    frames_detections = []

    for i in range(start, end + 1):
        frame_path = 'frame_{:04d}.xml'.format(i)
        root = ET.parse(os.path.join(root_directory, frame_path)).getroot()

        frame_detections = []

        for obj in root.findall('object'):
            box = obj.find('bndbox')

            label = obj.find('name').text
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)

            frame_detections.append(Detection('', label, (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1))

        frames_detections.append(frame_detections)

    return frames_detections
Ejemplo n.º 10
0
def off_the_shelf_yolo(tracking, debug=False, *args, **kwargs):
    video = Video("../datasets/AICity_data/train/S03/c010/frames")
    detection_transform = DetectionTransform()
    classes = utils.load_classes('../config/coco.names')
    gt = read_annotations(
        '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml')

    model = Darknet('../config/yolov3.cfg')
    model.load_weights('../weights/fine_tuned_yolo_freeze.weights')
    if torch.cuda.is_available():
        model = model.cuda()

    frames = []
    last_im = None

    model.eval()
    with torch.no_grad():
        for i, im in tqdm(enumerate(video.get_frames(start=len(video) // 4)),
                          total=len(video),
                          file=sys.stdout,
                          desc='Yolo'):
            im_tensor = detection_transform(im)

            im_tensor = im_tensor.view((-1, ) + im_tensor.size())
            if torch.cuda.is_available():
                im_tensor = im_tensor.cuda()

            detections = model.forward(im_tensor)
            detections = utils.non_max_suppression(detections,
                                                   80,
                                                   conf_thres=.6,
                                                   nms_thres=0.3)

            frame = Frame(i + (len(video) // 4))
            frame.ground_truth = gt[frame.id]

            for d in detections[0]:
                if int(d[6]) in VALID_LABELS:
                    bbox = d.cpu().numpy()
                    det = Detection(-1,
                                    classes[int(d[6])], (bbox[0], bbox[1]),
                                    width=bbox[2] - bbox[0],
                                    height=bbox[3] - bbox[1],
                                    confidence=d[5])
                    detection_transform.unshrink_detection(det)
                    frame.detections.append(det)

            if tracking is not None:
                last_frame = None if len(frames) == 0 else frames[-1]
                tracking(frame=frame,
                         im=im,
                         last_frame=last_frame,
                         last_im=last_im,
                         frames=frames,
                         debug=False)

            frames.append(frame)

            last_im = im

            if debug:
                plt.figure()
                for det in frame.detections:
                    rect = patches.Rectangle(det.top_left,
                                             det.width,
                                             det.height,
                                             linewidth=2,
                                             edgecolor='blue',
                                             facecolor='none')
                    plt.gca().add_patch(rect)
                    if tracking is None:
                        text = '{}'.format(det.label)
                    else:
                        text = '{} ~ {}'.format(det.label, det.id)
                    plt.text(det.top_left[0],
                             det.top_left[1],
                             s=text,
                             color='white',
                             verticalalignment='top',
                             bbox={
                                 'color': 'blue',
                                 'pad': 0
                             })
                plt.imshow(im)
                plt.axis('off')
                # plt.savefig('../video/video_yolo_fine_tune_good/frame_{:04d}'.format(i))
                plt.show()
                plt.close()
        # iou_over_time(frames)
        mAP = mean_average_precision(frames)
        print("YOLO mAP:", mAP)
Ejemplo n.º 11
0
def off_the_shelf_ssd(tracking, debug=False, **kwargs):
    if cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    gt = read_annotations(
        '../datasets/AICity_data/train/S03/c010/m6-full_annotation.xml')
    video = Video("../datasets/AICity_data/train/S03/c010/frames")
    trans = transforms.Compose(
        [transforms.Resize((300, 300)),
         transforms.ToTensor()])

    labels = (  # always index 0
        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
        'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
        'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')

    model = build_ssd('test', 300, 21)  # initialize SSD
    model.load_weights('../weights/ssd300_mAP_77.43_v2.pth')
    if torch.cuda.is_available():
        model = model.cuda()

    frames = []

    model.eval()
    with torch.no_grad():
        for i, im in enumerate(video.get_frames()):

            im_tensor = trans(im)
            im_tensor = im_tensor.view((-1, ) + im_tensor.size())
            if torch.cuda.is_available():
                im_tensor = im_tensor.cuda()

            output = model.forward(im_tensor)
            detections = output.data

            w = im.width
            h = im.height
            frame = Frame(i)

            frame.ground_truth = gt[frame.id]

            # skip j = 0, because it's the background class
            for j in (2, 6, 7, 14):
                dets = detections[0, j, :]
                mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, 5)
                if dets.size(0) == 0:
                    continue
                boxes = dets[:, 1:]
                scores = dets[:, 0].cpu().numpy()
                cls_dets = np.hstack((boxes.cpu().numpy(),
                                      scores[:,
                                             np.newaxis])).astype(np.float32,
                                                                  copy=False)
                for cls_det in cls_dets:
                    x1 = int(w * cls_det[0])
                    y1 = int(h * cls_det[1])
                    det = Detection(-1,
                                    labels[j - 1], (x1, y1),
                                    width=w * (cls_det[2] - cls_det[0]),
                                    height=h * (cls_det[3] - cls_det[1]),
                                    confidence=cls_det[4])
                    frame.detections.append(det)

            # kalman(frame)
            if tracking is not None:
                tracking(frame, frames, debug=debug)
            frames.append(frame)

            if debug:
                plt.figure()
                for det in frame.detections:
                    rect = patches.Rectangle(det.top_left,
                                             det.width,
                                             det.height,
                                             linewidth=2,
                                             edgecolor='blue',
                                             facecolor='none')
                    plt.gca().add_patch(rect)
                    plt.text(det.top_left[0],
                             det.top_left[1],
                             s='{} ~ {}'.format(det.label, det.id),
                             color='white',
                             verticalalignment='top',
                             bbox={
                                 'color': 'blue',
                                 'pad': 0
                             })
                plt.imshow(im)
                plt.axis('off')
                # plt.savefig('../video/video_ssd_KalmanID/frame_{:04d}'.format(i))
                plt.show()
                plt.close()

        #iou_over_time(frames)
        mAP = mean_average_precision(frames)
        print("SSD mAP:", mAP)
 def unshrink_detection(self, det: Detection) -> None:
     top_left = (int(det.top_left[0] * self.scale - self.pad[0]),
                 int(det.top_left[1] * self.scale - self.pad[1]))
     det.top_left = top_left
     det.width = int(det.width * self.scale)
     det.height = int(det.height * self.scale)
 def shrink_detection(self, det: Detection) -> None:
     top_left = (int((det.top_left[0] + self.pad[0]) / self.scale),
                 int((det.top_left[1] + self.pad[1]) / self.scale))
     det.top_left = top_left
     det.width = int(det.width / self.scale)
     det.height = int(det.height / self.scale)
Ejemplo n.º 14
0
 def _find_id(self, detection: Detection, frame_list: List[Frame]) -> None:
     for i in range(-1, max(-self.look_back, -len(frame_list)) - 1, -1):
         for detection2 in frame_list[i].detections:
             if detection.iou(detection2) > INTERSECTION_THRESHOLD:
                 detection.id = detection2.id
                 return