def split_labels_train_val( labels: sleap.Labels, validation_fraction: float ) -> Tuple[sleap.Labels, List[int], sleap.Labels, List[int]]: """Make a train/validation split from a labels dataset. Args: labels: A `sleap.Labels` dataset with labeled frames. validation_fraction: Fraction of frames to use for validation. Returns: A tuple of `(labels_train, idx_train, labels_val, idx_val)`. `labels_train` and `labels_val` are `sleap.Label` objects containing the selected frames for each split. Their `videos`, `tracks` and `provenance` attributes are identical to `labels` even if the split does not contain instances with a particular video or track. `idx_train` and `idx_val` are list indices of the labeled frames within the input labels that were assigned to each split, i.e.: `labels[idx_train] == labels_train[:]` If there is only one labeled frame in `labels`, both of the labels will contain the same frame. If `validation_fraction` would result in fewer than one label for either split, it will be rounded to ensure there is at least one label in each. """ if len(labels) == 1: return labels, [0], labels, [0] # Split indices. n_val = round(len(labels) * validation_fraction) n_val = max(min(n_val, len(labels) - 1), 1) idx_train, idx_val = train_test_split(list(range(len(labels))), test_size=n_val) # Create labels and keep original metadata. labels_train = sleap.Labels(labels[idx_train]) labels_train.videos = labels.videos labels_train.tracks = labels.tracks labels_train.provenance = labels.provenance labels_val = sleap.Labels(labels[idx_val]) labels_val.videos = labels.videos labels_val.tracks = labels.tracks labels_val.provenance = labels.provenance return labels_train, idx_train, labels_val, idx_val
def test_labels(): skel = sleap.Skeleton() skel.add_node("a") skel.add_node("b") vid = sleap.Video.from_numpy(np.zeros((8, 12, 12, 1), dtype="uint8")) labels = sleap.Labels() for fidx in range(len(vid)): insts = [] insts.append( sleap.Instance.from_pointsarray( points=np.array([[1, 2], [3, 4]]) + fidx, skeleton=skel ) ) if fidx >= 3: insts.append( sleap.Instance.from_pointsarray( points=np.array([[5, 6], [7, 8]]) + fidx, skeleton=skel ) ) lf = sleap.LabeledFrame(video=vid, frame_idx=fidx, instances=insts) labels.append(lf) return labels
def predict( self, data_provider: Provider, make_instances: bool = True, make_labels: bool = False, ): t0_gen = time.time() if isinstance(data_provider, sleap.Labels): data_provider = LabelsReader(data_provider) elif isinstance(data_provider, sleap.Video): data_provider = VideoReader(data_provider) generator = self.predict_generator(data_provider) if make_instances or make_labels: lfs = self.make_labeled_frames_from_generator(generator, data_provider) elapsed = time.time() - t0_gen logger.info( f"Predicted {len(lfs)} labeled frames in {elapsed:.3f} secs [{len(lfs)/elapsed:.1f} FPS]" ) if make_labels: return sleap.Labels(lfs) else: return lfs else: examples = list(generator) elapsed = time.time() - t0_gen logger.info( f"Predicted {len(examples)} examples in {elapsed:.3f} secs [{len(examples)/elapsed:.1f} FPS]" ) return examples
def test_labels_reader_subset(min_labels): labels = sleap.Labels([min_labels[0], min_labels[0], min_labels[0]]) assert len(labels) == 3 labels_reader = providers.LabelsReader(labels, example_indices=[2, 1]) assert len(labels_reader) == 2 examples = list(iter(labels_reader.make_dataset())) assert len(examples) == 2 assert examples[0]["example_ind"] == 2 assert examples[1]["example_ind"] == 1
def split_labels( labels: sleap.Labels, split_fractions: Sequence[float] ) -> Tuple[sleap.Labels]: """Split a `sleap.Labels` into multiple new ones with random subsets of the data. Args: labels: An instance of `sleap.Labels`. split_fractions: One or more floats between 0 and 1 that specify the fraction of examples that should be in each dataset. These should add up to <= 1.0. Fractions of less than 1 element will be rounded up to ensure that is at least 1 element in each split. One of the fractions may be -1 to indicate that it should contain all elements left over from the other splits. Returns: A tuple of new `sleap.Labels` instances of the same length as `split_fractions`. Raises: ValueError: If more than one split fraction is specified as -1. ValueError: If the splits add up to more than the total available examples. Note: Sampling is done without replacement. """ # Get indices for labeled frames. labels_indices = np.arange(len(labels)).astype("int64") # Compute split sizes. n_examples = len(labels_indices) n_examples_per_split = np.array(split_fractions).astype("float64") if (n_examples_per_split == -1).sum() > 1: raise ValueError("Only one split fraction can be specified as -1.") n_examples_per_split[n_examples_per_split == -1] = np.NaN n_examples_per_split = np.ceil(n_examples_per_split * n_examples) n_examples_per_split[np.isnan(n_examples_per_split)] = np.maximum( n_examples - np.nansum(n_examples_per_split), 1 ) n_examples_per_split = n_examples_per_split.astype("int64") if n_examples_per_split.sum() > n_examples: raise ValueError("Splits cannot sum to more than the total input labels.") # Sample and create new Labels instances. split_labels = [] for n_samples in n_examples_per_split: # Sample. sampled_indices = np.random.default_rng().choice( labels_indices, size=n_samples, replace=False ) # Create new instance. split_labels.append(sleap.Labels([labels[int(ind)] for ind in sampled_indices])) # Exclude the sampled indices from the available indices. labels_indices = np.setdiff1d(labels_indices, sampled_indices) return tuple(split_labels)
def predict( self, data_provider: Provider, make_instances: bool = True, make_labels: bool = False, ): generator = self.predict_generator(data_provider) if make_instances or make_labels: lfs = self.make_labeled_frames_from_generator( generator, data_provider) if make_labels: return sleap.Labels(lfs) else: return lfs return list(generator)
def from_user_instances(cls, labels: sleap.Labels) -> "LabelsReader": """Create a `LabelsReader` using the user instances in a `Labels` set. Args: labels: A `sleap.Labels` instance containing user instances. Returns: A `LabelsReader` instance that can create a dataset for pipelining. Note that the examples may change in ordering relative to the input `labels`, so be sure to use the `labels` attribute in the returned instance. """ user_labels = sleap.Labels( [ sleap.LabeledFrame(lf.video, lf.frame_idx, lf.training_instances) for lf in labels.user_labeled_frames ] ) return cls(labels=user_labels)
def test_labels_reader_multi_size(): # Create some fake data using two different size videos. skeleton = sleap.Skeleton.from_names_and_edge_inds(["A"]) labels = sleap.Labels([ sleap.LabeledFrame( frame_idx=0, video=sleap.Video.from_filename(TEST_SMALL_ROBOT_MP4_FILE, grayscale=True), instances=[ sleap.Instance.from_pointsarray(np.array([[128, 128]]), skeleton=skeleton) ], ), sleap.LabeledFrame( frame_idx=0, video=sleap.Video.from_filename(TEST_H5_FILE, dataset="/box", input_format="channels_first"), instances=[ sleap.Instance.from_pointsarray(np.array([[128, 128]]), skeleton=skeleton) ], ), ]) # Create a loader for those labels. labels_reader = providers.LabelsReader(labels) ds = labels_reader.make_dataset() ds_iter = iter(ds) # Check LabelReader can provide different shapes of individual samples assert next(ds_iter)["image"].shape == (320, 560, 1) assert next(ds_iter)["image"].shape == (512, 512, 1) # Check util functions h, w = labels_reader.max_height_and_width assert h == 512 assert w == 560 assert labels_reader.is_from_multi_size_videos
def test_random_flipper(): vid = sleap.Video.from_filename( "tests/data/json_format_v1/centered_pair_low_quality.mp4") skel = sleap.Skeleton.from_names_and_edge_inds(["A", "BL", "BR"], [[0, 1], [0, 2]]) labels = sleap.Labels([ sleap.LabeledFrame( video=vid, frame_idx=0, instances=[ sleap.Instance.from_pointsarray([[25, 50], [50, 25], [25, 25]], skeleton=skel), sleap.Instance.from_pointsarray( [[125, 150], [150, 125], [125, 125]], skeleton=skel), ], ) ]) p = labels.to_pipeline() p += sleap.nn.data.augmentation.RandomFlipper.from_skeleton( skel, horizontal=True, probability=1.0) ex = p.peek() np.testing.assert_array_equal(ex["image"], vid[0][0][:, ::-1]) np.testing.assert_array_equal( ex["instances"], [ [[358.0, 50.0], [333.0, 25.0], [358.0, 25.0]], [[258.0, 150.0], [233.0, 125.0], [258.0, 125.0]], ], ) skel.add_symmetry("BL", "BR") p = labels.to_pipeline() p += sleap.nn.data.augmentation.RandomFlipper.from_skeleton( skel, horizontal=True, probability=1.0) ex = p.peek() np.testing.assert_array_equal(ex["image"], vid[0][0][:, ::-1]) np.testing.assert_array_equal( ex["instances"], [ [[358.0, 50.0], [358.0, 25.0], [333.0, 25.0]], [[258.0, 150.0], [258.0, 125.0], [233.0, 125.0]], ], ) p = labels.to_pipeline() p += sleap.nn.data.augmentation.RandomFlipper.from_skeleton( skel, horizontal=True, probability=0.0) ex = p.peek() np.testing.assert_array_equal(ex["image"], vid[0][0]) np.testing.assert_array_equal( ex["instances"], [[[25, 50], [50, 25], [25, 25]], [[125, 150], [150, 125], [125, 125]]], ) p = labels.to_pipeline() p += sleap.nn.data.augmentation.RandomFlipper.from_skeleton( skel, horizontal=False, probability=1.0) ex = p.peek() np.testing.assert_array_equal(ex["image"], vid[0][0][::-1, :]) np.testing.assert_array_equal( ex["instances"], [[[25, 333], [25, 358], [50, 358]], [[125, 233], [125, 258], [150, 258]]], )
def test_size_matcher(): # Create some fake data using two different size videos. skeleton = sleap.Skeleton.from_names_and_edge_inds(["A"]) labels = sleap.Labels([ sleap.LabeledFrame( frame_idx=0, video=sleap.Video.from_filename(TEST_SMALL_ROBOT_MP4_FILE, grayscale=True), instances=[ sleap.Instance.from_pointsarray(np.array([[128, 128]]), skeleton=skeleton) ], ), sleap.LabeledFrame( frame_idx=0, video=sleap.Video.from_filename(TEST_H5_FILE, dataset="/box", input_format="channels_first"), instances=[ sleap.Instance.from_pointsarray(np.array([[128, 128]]), skeleton=skeleton) ], ), ]) # Create a loader for those labels. labels_reader = providers.LabelsReader(labels) ds = labels_reader.make_dataset() ds_iter = iter(ds) assert next(ds_iter)["image"].shape == (320, 560, 1) assert next(ds_iter)["image"].shape == (512, 512, 1) def check_padding(image, from_y, to_y, from_x, to_x): assert (image.numpy()[from_y:to_y, from_x:to_x] == 0).all() # Check SizeMatcher when target dims is not strictly larger than actual image dims size_matcher = SizeMatcher(max_image_height=560, max_image_width=560) transform_iter = iter(size_matcher.transform_dataset(ds)) im1 = next(transform_iter)["image"] assert im1.shape == (560, 560, 1) # padding should be on the bottom check_padding(im1, 321, 560, 0, 560) im2 = next(transform_iter)["image"] assert im2.shape == (560, 560, 1) # Variant 2 size_matcher = SizeMatcher(max_image_height=320, max_image_width=560) transform_iter = iter(size_matcher.transform_dataset(ds)) im1 = next(transform_iter)["image"] assert im1.shape == (320, 560, 1) im2 = next(transform_iter)["image"] assert im2.shape == (320, 560, 1) # padding should be on the right check_padding(im2, 0, 320, 321, 560) # Check SizeMatcher when target is 'max' in both dimensions size_matcher = SizeMatcher(max_image_height=512, max_image_width=560) transform_iter = iter(size_matcher.transform_dataset(ds)) im1 = next(transform_iter)["image"] assert im1.shape == (512, 560, 1) # Check padding is on the bottom check_padding(im1, 320, 512, 0, 560) im2 = next(transform_iter)["image"] assert im2.shape == (512, 560, 1) # Check padding is on the right check_padding(im2, 0, 512, 512, 560) # Check SizeMatcher when target is larger in both dimensions size_matcher = SizeMatcher(max_image_height=750, max_image_width=750) transform_iter = iter(size_matcher.transform_dataset(ds)) im1 = next(transform_iter)["image"] assert im1.shape == (750, 750, 1) # Check padding is on the bottom check_padding(im1, 700, 750, 0, 750) im2 = next(transform_iter)["image"] assert im2.shape == (750, 750, 1)
def test_split_labels_train_val(): vid = sleap.Video(backend=sleap.io.video.MediaVideo) labels = sleap.Labels([sleap.LabeledFrame(video=vid, frame_idx=0)]) train, train_inds, val, val_inds = split_labels_train_val(labels, 0) assert len(train) == 1 assert len(val) == 1 train, train_inds, val, val_inds = split_labels_train_val(labels, 0.1) assert len(train) == 1 assert len(val) == 1 train, train_inds, val, val_inds = split_labels_train_val(labels, 0.5) assert len(train) == 1 assert len(val) == 1 train, train_inds, val, val_inds = split_labels_train_val(labels, 1.0) assert len(train) == 1 assert len(val) == 1 labels = sleap.Labels([ sleap.LabeledFrame(video=vid, frame_idx=0), sleap.LabeledFrame(video=vid, frame_idx=1), ]) train, train_inds, val, val_inds = split_labels_train_val(labels, 0) assert len(train) == 1 assert len(val) == 1 assert train[0].frame_idx != val[0].frame_idx train, train_inds, val, val_inds = split_labels_train_val(labels, 0.1) assert len(train) == 1 assert len(val) == 1 assert train[0].frame_idx != val[0].frame_idx train, train_inds, val, val_inds = split_labels_train_val(labels, 0.5) assert len(train) == 1 assert len(val) == 1 assert train[0].frame_idx != val[0].frame_idx train, train_inds, val, val_inds = split_labels_train_val(labels, 1.0) assert len(train) == 1 assert len(val) == 1 assert train[0].frame_idx != val[0].frame_idx labels = sleap.Labels([ sleap.LabeledFrame(video=vid, frame_idx=0), sleap.LabeledFrame(video=vid, frame_idx=1), sleap.LabeledFrame(video=vid, frame_idx=2), ]) train, train_inds, val, val_inds = split_labels_train_val(labels, 0) assert len(train) == 2 assert len(val) == 1 train, train_inds, val, val_inds = split_labels_train_val(labels, 0.1) assert len(train) == 2 assert len(val) == 1 train, train_inds, val, val_inds = split_labels_train_val(labels, 0.5) assert len(train) + len(val) == 3 train, train_inds, val, val_inds = split_labels_train_val(labels, 1.0) assert len(train) == 1 assert len(val) == 2