예제 #1
0
def simple_predictions():

    video = Video.from_filename("video.mp4")

    skeleton = Skeleton()
    skeleton.add_node("a")
    skeleton.add_node("b")

    track_a = Track(0, "a")
    track_b = Track(0, "b")

    labels = Labels()

    instances = []
    instances.append(
        PredictedInstance(
            skeleton=skeleton,
            score=2,
            track=track_a,
            points=dict(a=PredictedPoint(1, 1, score=0.5),
                        b=PredictedPoint(1, 1, score=0.5)),
        ))
    instances.append(
        PredictedInstance(
            skeleton=skeleton,
            score=5,
            track=track_b,
            points=dict(a=PredictedPoint(1, 1, score=0.7),
                        b=PredictedPoint(1, 1, score=0.7)),
        ))

    labeled_frame = LabeledFrame(video, frame_idx=0, instances=instances)
    labels.append(labeled_frame)

    instances = []
    instances.append(
        PredictedInstance(
            skeleton=skeleton,
            score=3,
            track=track_a,
            points=dict(a=PredictedPoint(4, 5, score=1.5),
                        b=PredictedPoint(1, 1, score=1.0)),
        ))
    instances.append(
        PredictedInstance(
            skeleton=skeleton,
            score=6,
            track=track_b,
            points=dict(a=PredictedPoint(6, 13, score=1.7),
                        b=PredictedPoint(1, 1, score=1.0)),
        ))

    labeled_frame = LabeledFrame(video, frame_idx=1, instances=instances)
    labels.append(labeled_frame)

    return labels
예제 #2
0
def multi_skel_vid_labels(hdf5_vid, small_robot_mp4_vid, skeleton, stickman):
    """
    Build a big list of LabeledFrame objects and wrap it in Labels class.

    Args:
        hdf5_vid: An HDF5 video fixture
        small_robot_mp4_vid: An MP4 video fixture
        skeleton: A fly skeleton.
        stickman: A stickman skeleton

    Returns:
        The Labels object containing all the labeled frames
    """
    labels = []
    stick_tracks = [
        Track(spawned_on=0, name=f"Stickman {i}") for i in range(6)
    ]
    fly_tracks = [Track(spawned_on=0, name=f"Fly {i}") for i in range(6)]

    # Make some tracks None to test that
    fly_tracks[3] = None
    stick_tracks[2] = None

    for f in range(500):
        vid = [hdf5_vid, small_robot_mp4_vid][f % 2]
        label = LabeledFrame(video=vid, frame_idx=f % vid.frames)

        fly_instances = []
        for i in range(6):
            fly_instances.append(
                Instance(skeleton=skeleton, track=fly_tracks[i]))
            for node in skeleton.nodes:
                fly_instances[i][node] = Point(x=i % vid.width,
                                               y=i % vid.height)

        stickman_instances = []
        for i in range(6):
            stickman_instances.append(
                Instance(skeleton=stickman, track=stick_tracks[i]))
            for node in stickman.nodes:
                stickman_instances[i][node] = Point(x=i % vid.width,
                                                    y=i % vid.height)

        label.instances = stickman_instances + fly_instances
        labels.append(label)

    labels = Labels(labels)

    return labels
예제 #3
0
def test_many_tracks_hdf5(tmpdir):
    labels = Labels()
    filename = os.path.join(tmpdir, "test.h5")

    labels.tracks = [Track(spawned_on=i, name=f"track {i}") for i in range(4000)]

    Labels.save_hdf5(filename=filename, labels=labels)
예제 #4
0
def test_multivideo_tracks():
    vid_a = Video.from_filename("foo.mp4")
    vid_b = Video.from_filename("bar.mp4")

    skeleton = Skeleton.load_json("tests/data/skeleton/fly_skeleton_legs.json")

    track_a = Track(spawned_on=2, name="A")
    track_b = Track(spawned_on=3, name="B")

    inst_a = Instance(track=track_a, skeleton=skeleton)
    inst_b = Instance(track=track_b, skeleton=skeleton)

    lf_a = LabeledFrame(vid_a, frame_idx=2, instances=[inst_a])
    lf_b = LabeledFrame(vid_b, frame_idx=3, instances=[inst_b])

    labels = Labels(labeled_frames=[lf_a, lf_b])

    # Try setting video B instance to track used in video A
    labels.track_swap(vid_b, new_track=track_a, old_track=track_b, frame_range=(3, 4))

    assert inst_b.track == track_a
예제 #5
0
    def read(
        cls,
        file: FileHandle,
        video_path: str,
        skeleton_path: str,
        *args,
        **kwargs,
    ) -> Labels:
        f = file.file

        video = Video.from_filename(video_path)
        skeleton_data = pd.read_csv(skeleton_path, header=0)

        skeleton = Skeleton()
        skeleton.add_nodes(skeleton_data["name"])
        nodes = skeleton.nodes

        for name, parent, swap in skeleton_data.itertuples(index=False,
                                                           name=None):
            if parent is not np.nan:
                skeleton.add_edge(parent, name)

        lfs = []

        pose_matrix = f["pose"][:]

        track_count, frame_count, node_count, _ = pose_matrix.shape

        tracks = [Track(0, f"Track {i}") for i in range(track_count)]
        for frame_idx in range(frame_count):
            lf_instances = []
            for track_idx in range(track_count):
                points_array = pose_matrix[track_idx, frame_idx, :, :]
                points = dict()
                for p in range(len(points_array)):
                    x, y, score = points_array[p]
                    points[nodes[p]] = Point(x, y)  # TODO: score

                inst = Instance(skeleton=skeleton,
                                track=tracks[track_idx],
                                points=points)
                lf_instances.append(inst)
            lfs.append(
                LabeledFrame(video,
                             frame_idx=frame_idx,
                             instances=lf_instances))

        return Labels(labeled_frames=lfs)
예제 #6
0
    def read(
        cls,
        file: FileHandle,
        video: Union[Video, str],
        *args,
        **kwargs,
    ) -> Labels:
        connect_adj_nodes = False

        if video is None:
            raise ValueError(
                "Cannot read analysis hdf5 if no video specified.")

        if not isinstance(video, Video):
            video = Video.from_filename(video)

        f = file.file
        tracks_matrix = f["tracks"][:].T
        track_names_list = f["track_names"][:].T
        node_names_list = f["node_names"][:].T

        # shape: frames * nodes * 2 * tracks
        frame_count, node_count, _, track_count = tracks_matrix.shape

        tracks = [
            Track(0, track_name.decode()) for track_name in track_names_list
        ]

        skeleton = Skeleton()
        last_node_name = None
        for node_name in node_names_list:
            node_name = node_name.decode()
            skeleton.add_node(node_name)
            if connect_adj_nodes and last_node_name:
                skeleton.add_edge(last_node_name, node_name)
            last_node_name = node_name

        frames = []
        for frame_idx in range(frame_count):
            instances = []
            for track_idx in range(track_count):
                points = tracks_matrix[frame_idx, ..., track_idx]
                if not np.all(np.isnan(points)):
                    point_scores = np.ones(len(points))
                    # make everything a PredictedInstance since the usual use
                    # case is to export predictions for analysis
                    instances.append(
                        PredictedInstance.from_arrays(
                            points=points,
                            point_confidences=point_scores,
                            skeleton=skeleton,
                            track=tracks[track_idx],
                            instance_score=1,
                        ))
            if instances:
                frames.append(
                    LabeledFrame(video=video,
                                 frame_idx=frame_idx,
                                 instances=instances))

        return Labels(labeled_frames=frames)
예제 #7
0
    def read(
        cls,
        file: FileHandle,
        img_dir: str,
        use_missing_gui: bool = False,
        *args,
        **kwargs,
    ) -> Labels:

        dicts = file.json

        # Make skeletons from "categories"
        skeleton_map = dict()
        for category in dicts["categories"]:
            skeleton = Skeleton(name=category["name"])
            skeleton_id = category["id"]
            node_names = category["keypoints"]
            skeleton.add_nodes(node_names)

            try:
                for src_idx, dst_idx in category["skeleton"]:
                    skeleton.add_edge(node_names[src_idx], node_names[dst_idx])
            except IndexError as e:
                # According to the COCO data format specifications[^1], the edges
                # are supposed to be 1-indexed. But in some of their own
                # dataset the edges are 1-indexed! So we'll try.
                # [1]: http://cocodataset.org/#format-data

                # Clear any edges we already created using 0-indexing
                skeleton.clear_edges()

                # Add edges
                for src_idx, dst_idx in category["skeleton"]:
                    skeleton.add_edge(node_names[src_idx - 1], node_names[dst_idx - 1])

            skeleton_map[skeleton_id] = skeleton

        # Make videos from "images"

        # Remove images that aren't referenced in the annotations
        img_refs = [annotation["image_id"] for annotation in dicts["annotations"]]
        dicts["images"] = list(filter(lambda im: im["id"] in img_refs, dicts["images"]))

        # Key in JSON file should be "file_name", but sometimes it's "filename",
        # so we have to check both.
        img_filename_key = "file_name"
        if img_filename_key not in dicts["images"][0].keys():
            img_filename_key = "filename"

        # First add the img_dir to each image filename
        img_paths = [
            os.path.join(img_dir, image[img_filename_key]) for image in dicts["images"]
        ]

        # See if there are any missing files
        img_missing = [not os.path.exists(path) for path in img_paths]

        if sum(img_missing):
            if use_missing_gui:
                okay = MissingFilesDialog(img_paths, img_missing).exec_()

                if not okay:
                    return None
            else:
                raise FileNotFoundError(
                    f"Images for COCO dataset could not be found in {img_dir}."
                )

        # Update the image paths (with img_dir or user selected path)
        for image, path in zip(dicts["images"], img_paths):
            image[img_filename_key] = path

        # Create the video objects for the image files
        image_video_map = dict()

        vid_id_video_map = dict()
        for image in dicts["images"]:
            image_id = image["id"]
            image_filename = image[img_filename_key]

            # Sometimes images have a vid_id which links multiple images
            # together as one video. If so, we'll use that as the video key.
            # But if there isn't a vid_id, we'll treat each images as a
            # distinct video and use the image id as the video id.
            vid_id = image.get("vid_id", image_id)

            if vid_id not in vid_id_video_map:
                kwargs = dict(filenames=[image_filename])
                for key in ("width", "height"):
                    if key in image:
                        kwargs[key] = image[key]

                video = Video.from_image_filenames(**kwargs)
                vid_id_video_map[vid_id] = video
                frame_idx = 0
            else:
                video = vid_id_video_map[vid_id]
                frame_idx = video.num_frames
                video.backend.filenames.append(image_filename)

            image_video_map[image_id] = (video, frame_idx)

        # Make instances from "annotations"
        lf_map = dict()
        track_map = dict()
        for annotation in dicts["annotations"]:
            skeleton = skeleton_map[annotation["category_id"]]
            image_id = annotation["image_id"]
            video, frame_idx = image_video_map[image_id]
            keypoints = np.array(annotation["keypoints"], dtype="int").reshape(-1, 3)

            track = None
            if "track_id" in annotation:
                track_id = annotation["track_id"]
                if track_id not in track_map:
                    track_map[track_id] = Track(frame_idx, str(track_id))
                track = track_map[track_id]

            points = dict()
            any_visible = False
            for i in range(len(keypoints)):
                node = skeleton.nodes[i]
                x, y, flag = keypoints[i]

                if flag == 0:
                    # node not labeled for this instance
                    continue

                is_visible = flag == 2
                any_visible = any_visible or is_visible
                points[node] = Point(x, y, is_visible)

            if points:
                # If none of the points had 2 has the "visible" flag, we'll
                # assume this incorrect and just mark all as visible.
                if not any_visible:
                    for point in points.values():
                        point.visible = True

                inst = Instance(skeleton=skeleton, points=points, track=track)

                if image_id not in lf_map:
                    lf_map[image_id] = LabeledFrame(video, frame_idx)

                lf_map[image_id].insert(0, inst)

        return Labels(labeled_frames=list(lf_map.values()))
예제 #8
0
파일: tracking.py 프로젝트: jens-k/sleap
    def track(
        self,
        untracked_instances: List[InstanceType],
        img: Optional[np.ndarray] = None,
        t: int = None,
    ) -> List[InstanceType]:
        """Performs a single step of tracking.

        Args:
            untracked_instances: List of instances to assign to tracks.
            img: Image data of the current frame for flow shifting.
            t: Current timestep. If not provided, increments from the internal queue.

        Returns:
            A list of the instances that were tracked.
        """

        if self.candidate_maker is None:
            return untracked_instances

        # Infer timestep if not provided.
        if t is None:
            if len(self.track_matching_queue) > 0:

                # Default to last timestep + 1 if available.
                t = self.track_matching_queue[-1].t + 1

            else:
                t = 0

        # Initialize containers for tracked instances at the current timestep.
        tracked_instances = []
        tracked_inds = []

        # Make cache so similarity function doesn't have to recompute everything.
        # similarity_cache = dict()

        # Process untracked instances.
        if len(untracked_instances) > 0:

            # Build a pool of matchable candidate instances.
            candidate_instances = self.candidate_maker.get_candidates(
                track_matching_queue=self.track_matching_queue,
                t=t,
                img=img,
            )

            if len(candidate_instances) > 0:

                # Group candidate instances by track.
                candidate_instances_by_track = defaultdict(list)
                for instance in candidate_instances:
                    candidate_instances_by_track[instance.track].append(
                        instance)

                # Compute similarity matrix between untracked instances and best
                # candidate for each track.
                candidate_tracks = list(candidate_instances_by_track.keys())
                matching_similarities = np.full(
                    (len(untracked_instances), len(candidate_tracks)), np.nan)
                matching_candidates = []

                for i, untracked_instance in enumerate(untracked_instances):
                    matching_candidates.append([])

                    for j, candidate_track in enumerate(candidate_tracks):

                        # Compute similarity between untracked instance and all track
                        # candidates.
                        track_instances = candidate_instances_by_track[
                            candidate_track]
                        track_matching_similarities = [
                            self.similarity_function(
                                untracked_instance,
                                candidate_instance,
                                # cache=similarity_cache
                            ) for candidate_instance in track_instances
                        ]

                        # Keep the best scoring instance for this track.
                        best_ind = np.argmax(track_matching_similarities)
                        matching_candidates[i].append(
                            track_instances[best_ind])

                        # Use the best similarity score for matching.
                        best_similarity = track_matching_similarities[best_ind]
                        matching_similarities[i, j] = best_similarity

                # Perform matching between untracked instances and candidates.
                cost = -matching_similarities
                cost[np.isnan(cost)] = np.inf
                matches = self.matching_function(cost)

                # Assign each matched instance.
                for i, j in matches:
                    # Pull out matched pair.
                    matched_instance = untracked_instances[i]
                    ref_instance = matching_candidates[i][j]

                    # Save matching score.
                    match_similarity = matching_similarities[i, j]

                    # Assign to track and save.
                    tracked_instances.append(
                        attr.evolve(
                            matched_instance,
                            track=ref_instance.track,
                            tracking_score=match_similarity,
                        ))

                    # Keep track of the assigned instances.
                    tracked_inds.append(i)

        # Spawn a new track for each remaining untracked instance.
        for i, inst in enumerate(untracked_instances):

            # Skip if this instance was tracked.
            if i in tracked_inds:
                continue

            # Skip if this instance is too small to spawn a new track with.
            if inst.n_visible_points < self.min_new_track_points:
                continue

            # Spawn new track.
            new_track = Track(spawned_on=t,
                              name=f"track_{len(self.spawned_tracks)}")
            self.spawned_tracks.append(new_track)

            # Assign instance to the new track and save.
            tracked_instances.append(attr.evolve(inst, track=new_track))

        # Add the tracked instances to the matching buffer.
        self.track_matching_queue.append(
            MatchedInstance(t, tracked_instances, img))

        # Save tracked instances internally.
        if self.save_tracked_instances:
            self.tracked_instances[t] = tracked_instances

        return tracked_instances
예제 #9
0
def load_predicted_labels_json_old(
    data_path: str,
    parsed_json: dict = None,
    adjust_matlab_indexing: bool = True,
    fix_rel_paths: bool = True,
) -> List[LabeledFrame]:
    """
    Load predicted instances from Talmo's old JSON format.

    Args:
        data_path: The path to the JSON file.
        parsed_json: The parsed json if already loaded, so we can save
            some time if already parsed.
        adjust_matlab_indexing: Whether to adjust indexing from MATLAB.
        fix_rel_paths: Whether to fix paths to videos to absolute paths.

    Returns:
        List of :class:`LabeledFrame` objects.
    """
    if parsed_json is None:
        data = json.loads(open(data_path).read())
    else:
        data = parsed_json

    videos = pd.DataFrame(data["videos"])
    predicted_instances = pd.DataFrame(data["predicted_instances"])
    predicted_points = pd.DataFrame(data["predicted_points"])

    if adjust_matlab_indexing:
        predicted_instances.frameIdx -= 1
        predicted_points.frameIdx -= 1

        predicted_points.node -= 1

        predicted_points.x -= 1

        predicted_points.y -= 1

    skeleton = Skeleton()
    skeleton.add_nodes(data["skeleton"]["nodeNames"])
    edges = data["skeleton"]["edges"]
    if adjust_matlab_indexing:
        edges = np.array(edges) - 1
    for (src_idx, dst_idx) in edges:
        skeleton.add_edge(
            data["skeleton"]["nodeNames"][src_idx],
            data["skeleton"]["nodeNames"][dst_idx],
        )

    if fix_rel_paths:
        for i, row in videos.iterrows():
            p = row.filepath
            if not os.path.exists(p):
                p = os.path.join(os.path.dirname(data_path), p)
                if os.path.exists(p):
                    videos.at[i, "filepath"] = p

    # Make the video objects
    video_objects = {}
    for i, row in videos.iterrows():
        if videos.at[i, "format"] == "media":
            vid = Video.from_media(videos.at[i, "filepath"])
        else:
            vid = Video.from_hdf5(
                filename=videos.at[i, "filepath"], dataset=videos.at[i, "dataset"]
            )

        video_objects[videos.at[i, "id"]] = vid

    track_ids = predicted_instances["trackId"].values
    unique_track_ids = np.unique(track_ids)

    spawned_on = {
        track_id: predicted_instances.loc[predicted_instances["trackId"] == track_id][
            "frameIdx"
        ].values[0]
        for track_id in unique_track_ids
    }
    tracks = {
        i: Track(name=str(i), spawned_on=spawned_on[i])
        for i in np.unique(predicted_instances["trackId"].values).tolist()
    }

    # A function to get all the instances for a particular video frame
    def get_frame_predicted_instances(video_id, frame_idx):
        points = predicted_points
        is_in_frame = (points["videoId"] == video_id) & (
            points["frameIdx"] == frame_idx
        )
        if not is_in_frame.any():
            return []

        instances = []
        frame_instance_ids = np.unique(points["instanceId"][is_in_frame])
        for i, instance_id in enumerate(frame_instance_ids):
            is_instance = is_in_frame & (points["instanceId"] == instance_id)
            track_id = predicted_instances.loc[
                predicted_instances["id"] == instance_id
            ]["trackId"].values[0]
            match_score = predicted_instances.loc[
                predicted_instances["id"] == instance_id
            ]["matching_score"].values[0]
            track_score = predicted_instances.loc[
                predicted_instances["id"] == instance_id
            ]["tracking_score"].values[0]
            instance_points = {
                data["skeleton"]["nodeNames"][n]: PredictedPoint(
                    x, y, visible=v, score=confidence
                )
                for x, y, n, v, confidence in zip(
                    *[
                        points[k][is_instance]
                        for k in ["x", "y", "node", "visible", "confidence"]
                    ]
                )
            }

            instance = PredictedInstance(
                skeleton=skeleton,
                points=instance_points,
                track=tracks[track_id],
                score=match_score,
            )
            instances.append(instance)

        return instances

    # Get the unique labeled frames and construct a list of LabeledFrame objects for them.
    frame_keys = list(
        {
            (videoId, frameIdx)
            for videoId, frameIdx in zip(
                predicted_points["videoId"], predicted_points["frameIdx"]
            )
        }
    )
    frame_keys.sort()
    labels = []
    for videoId, frameIdx in frame_keys:
        label = LabeledFrame(
            video=video_objects[videoId],
            frame_idx=frameIdx,
            instances=get_frame_predicted_instances(videoId, frameIdx),
        )
        labels.append(label)

    return labels