def _read_detections(lines, cnn_f, tsn_f): # see motchallenge_devkit.py create_data_source detections = {i: [] for i in range(len(lines))} for frame_idx, line in enumerate(lines): _, boxes = line.split(' ') # no use image_filename box_list = json.loads(boxes) cnn = json.loads(cnn_f.readline()) cnn_features = np.array([dic['features'] for dic in cnn]) if tsn_f is not None: tsn = json.loads(tsn_f.readline()) tsn_features = np.array([dic['before_fc_features'] for dic in tsn]) # flow, fusionの場合1フレーム目のデータが存在しないのでスキップ if frame_idx == 0: continue boxes = np.array([dic['box'] for dic in box_list]) boxes = np.array([(box[0], box[1], box[2] - box[0], box[3] - box[1]) for box in boxes], dtype=float) box_scores = np.array([dic['score'] for dic in box_list], dtype=float) for i in range(len(boxes)): detections[frame_idx].append( pymotutils.RegionOfInterestDetection(frame_idx, boxes[i], box_scores[i], xyz=None)) # see pymotutils.py compute_features setattr(detections[frame_idx][i], 'feature', cnn_features[i]) if tsn_f is not None: setattr(detections[frame_idx][i], 'tsn_feature', tsn_features[i]) return detections
def read_detections(filename, min_confidence=None): """Read detection file. Parameters ---------- filename : str Path to the detection file. min_confidence : Optional[float] A detector confidence threshold. Detections with lower confidence are disregarded. Returns ------- Dict[int, List[MonoDetection]] This function returns a dictionary that maps frame indices to a list of detections in that frame. """ # format: frame id, bbox (x, y, w, h), confidence data = np.loadtxt(filename, delimiter=',') min_frame_idx = int(data[:, 0].min()) max_frame_idx = int(data[:, 0].max()) detections = {i: [] for i in range(min_frame_idx, max_frame_idx + 1)} for row in data: confidence = row[6] if min_confidence is not None and confidence < min_confidence: continue frame_idx, roi = int(row[0]), row[2:6] detections[frame_idx].append( pymotutils.RegionOfInterestDetection(frame_idx, roi, confidence)) return detections
def read_cvml_groundtruth(filename, projection_matrix): def fattr(node, name): return float(node.attributes[name].value) wrapped_projection_matrix = np.eye(4) wrapped_projection_matrix[:3, :4] = projection_matrix inv_projection_matrix = np.linalg.inv(wrapped_projection_matrix) xmldoc = minidom.parse(filename) track_set = pymotutils.TrackSet() for frame in xmldoc.getElementsByTagName("frame"): frame_idx = int(frame.attributes["number"].value) for obj in frame.getElementsByTagName("object"): box = obj.getElementsByTagName("box")[0] xc, yc = fattr(box, "xc"), fattr(box, "yc") w, h = fattr(box, "w"), fattr(box, "h") roi = xc - w / 2., yc - h / 2., w, h xyz, isvalid = intersect_with_ground_plane( inv_projection_matrix, GROUND_PLANE_NORMAL, GROUND_PLANE_DISTANCE, np.array([[xc, yc + h / 2.]])) assert isvalid[0], "Failed to compute ground plane projection" track_id = int(obj.attributes["id"].value) if track_id not in track_set.tracks: track_set.create_track(track_id) track_set.tracks[track_id].add( pymotutils.RegionOfInterestDetection(frame_idx, roi, xyz=xyz[0])) return track_set
def read_detections(filename, object_classes=None, min_height=MIN_OBJECT_HEIGHT_IN_PIXELS, min_confidence=-np.inf): """ File format: #Values Name Description ---------------------------------------------------------------------------- 1 frame Frame within the sequence where the object appearers 1 track id IGNORED 1 type Describes the type of object: 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc' or 'DontCare' 1 truncated IGNORED 1 occluded IGNORED 1 alpha IGNORED 4 bbox 2D bounding box of object in the image (0-based index): contains left, top, right, bottom pixel coordinates 3 dimensions IGNORED 3 location IGNORED 1 rotation_y IGNORED 1 score Float, indicating confidence in detection, higher is better. """ with open(filename, "r") as f: lines = f.read().splitlines() detections = {} for line in lines: words = line.strip().split(' ') assert len(words) == 18, "Invalid number of elements in line." object_class = words[2] if object_class not in object_classes: continue frame_idx = int(words[0]) roi = np.asarray([float(x) for x in words[6:10]]) roi[2:] -= roi[:2] - 1 # Convert to x, y, w, h if roi[3] < min_height: continue confidence = float(words[17]) if confidence < min_confidence: continue detections.setdefault(frame_idx, []).append( pymotutils.RegionOfInterestDetection(frame_idx, roi, confidence)) return detections
def read_cvml_detections(filename, projection_matrix, roi_scale_w=0.75, roi_scale_h=1.0): def fattr(node, name): return float(node.attributes[name].value) def rescale_roi(old_roi): x, y, w, h = old_roi new_w, new_h = roi_scale_w * w, roi_scale_h * h dw, dh = w - new_w, h - new_h x += dw / 2 y += dh / 2 return x, y, new_w, new_h wrapped_projection_matrix = np.eye(4) wrapped_projection_matrix[:3, :4] = projection_matrix inv_projection_matrix = np.linalg.inv(wrapped_projection_matrix) xmldoc = minidom.parse(filename) detections = {} for frame in xmldoc.getElementsByTagName("frame"): frame_idx = int(frame.attributes["number"].value) detections[frame_idx] = [] for obj in frame.getElementsByTagName("object"): box = obj.getElementsByTagName("box")[0] xc, yc = fattr(box, "xc"), fattr(box, "yc") w, h = fattr(box, "w"), fattr(box, "h") roi = xc - w / 2., yc - h / 2., w, h roi = rescale_roi(roi) confidence = fattr(obj, "confidence") xyz, isvalid = intersect_with_ground_plane( inv_projection_matrix, GROUND_PLANE_NORMAL, GROUND_PLANE_DISTANCE, np.array([[xc, yc + h / 2.]])) assert isvalid[0], "Failed to compute ground plane projection" detections[frame_idx].append( pymotutils.RegionOfInterestDetection(frame_idx, np.asarray(roi), confidence, xyz[0])) return detections
def read_detections(filename, min_confidence=None): """Read detection file. Parameters ---------- filename : str Path to the detection file. min_confidence : Optional[float] A detector confidence threshold. Detections with lower confidence are disregarded. Returns ------- Dict[int, List[MonoDetection]] This function returns a dictionary that maps frame indices to a list of detections in that frame. If the detection file contains 3D positions, these will be used as sensor_data. Otherwise, the sensor_data is set to the detection's region of interest (ROI). """ # format: frame id, track id, bbox (x, y, w, h), confidence, world (x, y, z) # track id is always -1 data = np.loadtxt(filename, delimiter=',') has_threed = np.any(data[:, 7:10] != -1) min_frame_idx = int(data[:, 0].min()) max_frame_idx = int(data[:, 0].max()) detections = {i: [] for i in range(min_frame_idx, max_frame_idx + 1)} for row in data: confidence = row[6] if min_confidence is not None and row[6] < min_confidence: continue frame_idx, roi = int(row[0]), row[2:6] xyz = row[7:10] if has_threed else None detections[frame_idx].append( pymotutils.RegionOfInterestDetection(frame_idx, roi, confidence, xyz=xyz)) return detections
def generate_detections(index_to_bgr_filenames, inference_graph_pb, class_to_name, min_confidence=0.5, max_bbox_overlap=0.7, verbose=False): """Generate detections from list of image filenames. Parameters ---------- index_to_bgr_filenames: Dict[int, str] Maps from frame index to image filename. The frame index is used to populate the RegionOfInterestDetection.frame_idx attribute. inference_graph_pb : str Path to the frozen_inference_graph.pb file. This file is contained in the model archive. class_to_name : Dict[int, str] A dictionary that maps from label to class name. Classes that are not contained in the dictionary are suppressed. Use MSCOCO_LABELMAP for networks trained on MSCOCO. min_confidence : float Minimum detector confidence in [0, 1]. Detections with confidence lower than this value are suppressed. max_bbox_overlap : float Non-maxima suppression threshold in [0, 1]. A large value reduces the number of returned detections. verbose : bool If True, prints status information about the number of processed frames to standard output. Returns ------- Dict[int, List[pymotutils.RegionOfInterestDetection]] Returns a dictionary that maps from frame index to list of detections. """ detector = Detector(inference_graph_pb) detections = dict() num_processed = 0 for frame_idx, filename in sorted(list(index_to_bgr_filenames.items())): if verbose: print("Processing detection on frame %d out of %d" % (num_processed, len(index_to_bgr_filenames))) num_processed += 1 bgr_image = cv2.imread(filename, cv2.IMREAD_COLOR) boxes, scores, classes = detector.run(bgr_image, min_confidence, max_bbox_overlap) keep = [i for i in range(len(boxes)) if classes[i] in class_to_name] boxes, scores, classes = boxes[keep], scores[keep], classes[keep] class_names = [class_to_name[x] for x in classes] detections[frame_idx] = { pymotutils.RegionOfInterestDetection(frame_idx, boxes[i], scores[i], class_label=classes[i], class_name=class_names[i]) for i in range(len(boxes)) } return detections