예제 #1
0
def _read_detections(lines, cnn_f, tsn_f):
    # see motchallenge_devkit.py create_data_source
    detections = {i: [] for i in range(len(lines))}
    for frame_idx, line in enumerate(lines):
        _, boxes = line.split(' ')  # no use image_filename
        box_list = json.loads(boxes)
        cnn = json.loads(cnn_f.readline())
        cnn_features = np.array([dic['features'] for dic in cnn])

        if tsn_f is not None:
            tsn = json.loads(tsn_f.readline())
            tsn_features = np.array([dic['before_fc_features'] for dic in tsn])

        # flow, fusionの場合1フレーム目のデータが存在しないのでスキップ
        if frame_idx == 0:
            continue

        boxes = np.array([dic['box'] for dic in box_list])
        boxes = np.array([(box[0], box[1], box[2] - box[0], box[3] - box[1])
                          for box in boxes],
                         dtype=float)
        box_scores = np.array([dic['score'] for dic in box_list], dtype=float)

        for i in range(len(boxes)):
            detections[frame_idx].append(
                pymotutils.RegionOfInterestDetection(frame_idx,
                                                     boxes[i],
                                                     box_scores[i],
                                                     xyz=None))
            # see pymotutils.py compute_features
            setattr(detections[frame_idx][i], 'feature', cnn_features[i])
            if tsn_f is not None:
                setattr(detections[frame_idx][i], 'tsn_feature',
                        tsn_features[i])
    return detections
예제 #2
0
def read_detections(filename, min_confidence=None):
    """Read detection file.

    Parameters
    ----------
    filename : str
        Path to the detection file.
    min_confidence : Optional[float]
        A detector confidence threshold. Detections with lower confidence are
        disregarded.

    Returns
    -------
    Dict[int, List[MonoDetection]]
        This function returns a dictionary that maps frame indices to a list
        of detections in that frame.

    """
    # format: frame id, bbox (x, y, w, h), confidence
    data = np.loadtxt(filename, delimiter=',')
    min_frame_idx = int(data[:, 0].min())
    max_frame_idx = int(data[:, 0].max())
    detections = {i: [] for i in range(min_frame_idx, max_frame_idx + 1)}
    for row in data:
        confidence = row[6]
        if min_confidence is not None and confidence < min_confidence:
            continue
        frame_idx, roi = int(row[0]), row[2:6]
        detections[frame_idx].append(
            pymotutils.RegionOfInterestDetection(frame_idx, roi, confidence))
    return detections
예제 #3
0
def read_cvml_groundtruth(filename, projection_matrix):
    def fattr(node, name):
        return float(node.attributes[name].value)

    wrapped_projection_matrix = np.eye(4)
    wrapped_projection_matrix[:3, :4] = projection_matrix
    inv_projection_matrix = np.linalg.inv(wrapped_projection_matrix)

    xmldoc = minidom.parse(filename)
    track_set = pymotutils.TrackSet()
    for frame in xmldoc.getElementsByTagName("frame"):
        frame_idx = int(frame.attributes["number"].value)
        for obj in frame.getElementsByTagName("object"):
            box = obj.getElementsByTagName("box")[0]
            xc, yc = fattr(box, "xc"), fattr(box, "yc")
            w, h = fattr(box, "w"), fattr(box, "h")
            roi = xc - w / 2., yc - h / 2., w, h
            xyz, isvalid = intersect_with_ground_plane(
                inv_projection_matrix, GROUND_PLANE_NORMAL,
                GROUND_PLANE_DISTANCE, np.array([[xc, yc + h / 2.]]))
            assert isvalid[0], "Failed to compute ground plane projection"

            track_id = int(obj.attributes["id"].value)
            if track_id not in track_set.tracks:
                track_set.create_track(track_id)
            track_set.tracks[track_id].add(
                pymotutils.RegionOfInterestDetection(frame_idx,
                                                     roi,
                                                     xyz=xyz[0]))
    return track_set
예제 #4
0
def read_detections(filename,
                    object_classes=None,
                    min_height=MIN_OBJECT_HEIGHT_IN_PIXELS,
                    min_confidence=-np.inf):
    """

    File format:

    #Values    Name      Description
    ----------------------------------------------------------------------------
       1    frame        Frame within the sequence where the object appearers
       1    track id     IGNORED
       1    type         Describes the type of object: 'Car', 'Van', 'Truck',
                         'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
                         'Misc' or 'DontCare'
       1    truncated    IGNORED
       1    occluded     IGNORED
       1    alpha        IGNORED
       4    bbox         2D bounding box of object in the image (0-based index):
                         contains left, top, right, bottom pixel coordinates
       3    dimensions   IGNORED
       3    location     IGNORED
       1    rotation_y   IGNORED
       1    score        Float, indicating confidence in detection, higher is
                         better.

    """
    with open(filename, "r") as f:
        lines = f.read().splitlines()

    detections = {}
    for line in lines:
        words = line.strip().split(' ')
        assert len(words) == 18, "Invalid number of elements in line."
        object_class = words[2]
        if object_class not in object_classes:
            continue
        frame_idx = int(words[0])
        roi = np.asarray([float(x) for x in words[6:10]])
        roi[2:] -= roi[:2] - 1  # Convert to x, y, w, h
        if roi[3] < min_height:
            continue
        confidence = float(words[17])
        if confidence < min_confidence:
            continue
        detections.setdefault(frame_idx, []).append(
            pymotutils.RegionOfInterestDetection(frame_idx, roi, confidence))

    return detections
예제 #5
0
def read_cvml_detections(filename,
                         projection_matrix,
                         roi_scale_w=0.75,
                         roi_scale_h=1.0):
    def fattr(node, name):
        return float(node.attributes[name].value)

    def rescale_roi(old_roi):
        x, y, w, h = old_roi
        new_w, new_h = roi_scale_w * w, roi_scale_h * h
        dw, dh = w - new_w, h - new_h
        x += dw / 2
        y += dh / 2
        return x, y, new_w, new_h

    wrapped_projection_matrix = np.eye(4)
    wrapped_projection_matrix[:3, :4] = projection_matrix
    inv_projection_matrix = np.linalg.inv(wrapped_projection_matrix)

    xmldoc = minidom.parse(filename)
    detections = {}
    for frame in xmldoc.getElementsByTagName("frame"):
        frame_idx = int(frame.attributes["number"].value)
        detections[frame_idx] = []
        for obj in frame.getElementsByTagName("object"):
            box = obj.getElementsByTagName("box")[0]
            xc, yc = fattr(box, "xc"), fattr(box, "yc")
            w, h = fattr(box, "w"), fattr(box, "h")
            roi = xc - w / 2., yc - h / 2., w, h
            roi = rescale_roi(roi)
            confidence = fattr(obj, "confidence")
            xyz, isvalid = intersect_with_ground_plane(
                inv_projection_matrix, GROUND_PLANE_NORMAL,
                GROUND_PLANE_DISTANCE, np.array([[xc, yc + h / 2.]]))
            assert isvalid[0], "Failed to compute ground plane projection"
            detections[frame_idx].append(
                pymotutils.RegionOfInterestDetection(frame_idx,
                                                     np.asarray(roi),
                                                     confidence, xyz[0]))
    return detections
예제 #6
0
def read_detections(filename, min_confidence=None):
    """Read detection file.

    Parameters
    ----------
    filename : str
        Path to the detection file.
    min_confidence : Optional[float]
        A detector confidence threshold. Detections with lower confidence are
        disregarded.

    Returns
    -------
    Dict[int, List[MonoDetection]]
        This function returns a dictionary that maps frame indices to a list
        of detections in that frame. If the detection file contains 3D
        positions, these will be used as sensor_data. Otherwise, the sensor_data
        is set to the detection's region of interest (ROI).

    """
    # format: frame id, track id, bbox (x, y, w, h), confidence, world (x, y, z)
    # track id is always -1
    data = np.loadtxt(filename, delimiter=',')
    has_threed = np.any(data[:, 7:10] != -1)
    min_frame_idx = int(data[:, 0].min())
    max_frame_idx = int(data[:, 0].max())
    detections = {i: [] for i in range(min_frame_idx, max_frame_idx + 1)}
    for row in data:
        confidence = row[6]
        if min_confidence is not None and row[6] < min_confidence:
            continue
        frame_idx, roi = int(row[0]), row[2:6]
        xyz = row[7:10] if has_threed else None
        detections[frame_idx].append(
            pymotutils.RegionOfInterestDetection(frame_idx,
                                                 roi,
                                                 confidence,
                                                 xyz=xyz))
    return detections
def generate_detections(index_to_bgr_filenames,
                        inference_graph_pb,
                        class_to_name,
                        min_confidence=0.5,
                        max_bbox_overlap=0.7,
                        verbose=False):
    """Generate detections from list of image filenames.

    Parameters
    ----------
    index_to_bgr_filenames: Dict[int, str]
        Maps from frame index to image filename. The frame index is used to
        populate the RegionOfInterestDetection.frame_idx attribute.
    inference_graph_pb : str
        Path to the frozen_inference_graph.pb file. This file is contained in
        the model archive.
    class_to_name : Dict[int, str]
        A dictionary that maps from label to class name. Classes that are not
        contained in the dictionary are suppressed. Use MSCOCO_LABELMAP for
        networks trained on MSCOCO.
    min_confidence : float
        Minimum detector confidence in [0, 1]. Detections with confidence
        lower than this value are suppressed.
    max_bbox_overlap : float
        Non-maxima suppression threshold in [0, 1]. A large value
        reduces the number of returned detections.
    verbose : bool
        If True, prints status information about the number of processed frames
        to standard output.

    Returns
    -------
    Dict[int, List[pymotutils.RegionOfInterestDetection]]
        Returns a dictionary that maps from frame index to list of detections.

    """
    detector = Detector(inference_graph_pb)
    detections = dict()

    num_processed = 0
    for frame_idx, filename in sorted(list(index_to_bgr_filenames.items())):
        if verbose:
            print("Processing detection on frame %d out of %d" %
                  (num_processed, len(index_to_bgr_filenames)))
            num_processed += 1
        bgr_image = cv2.imread(filename, cv2.IMREAD_COLOR)
        boxes, scores, classes = detector.run(bgr_image, min_confidence,
                                              max_bbox_overlap)

        keep = [i for i in range(len(boxes)) if classes[i] in class_to_name]
        boxes, scores, classes = boxes[keep], scores[keep], classes[keep]
        class_names = [class_to_name[x] for x in classes]

        detections[frame_idx] = {
            pymotutils.RegionOfInterestDetection(frame_idx,
                                                 boxes[i],
                                                 scores[i],
                                                 class_label=classes[i],
                                                 class_name=class_names[i])
            for i in range(len(boxes))
        }
    return detections