def cut_set_with_mixed_cut(cut1, cut2): mixed_cut = MixedCut( id="mixed-cut-id", tracks=[MixTrack(cut=cut1), MixTrack(cut=cut2, offset=1.0, snr=10)], ) return CutSet({cut.id: cut for cut in [cut1, cut2, mixed_cut]})
def split(manifest: Manifest, num_splits: int, randomize: bool = False) -> List[Manifest]: """Split a manifest into `num_splits` equal parts. The element order can be randomized.""" num_items = len(manifest) if num_splits > num_items: raise ValueError( f"Cannot split manifest into more chunks ({num_splits}) than its number of items {num_items}" ) chunk_size = int(ceil(num_items / num_splits)) split_indices = [(i * chunk_size, min(num_items, (i + 1) * chunk_size)) for i in range(num_splits)] def maybe_randomize(items: Iterable[Any]) -> List[Any]: items = list(items) if randomize: random.shuffle(items) return items if isinstance(manifest, RecordingSet): contents = maybe_randomize(manifest.recordings.items()) return [ RecordingSet(recordings=dict(contents[begin:end])) for begin, end in split_indices ] if isinstance(manifest, SupervisionSet): contents = maybe_randomize(manifest.segments.items()) return [ SupervisionSet(segments=dict(contents[begin:end])) for begin, end in split_indices ] if isinstance(manifest, FeatureSet): contents = maybe_randomize(manifest.features) return [ FeatureSet(features=contents[begin:end], feature_extractor=manifest.feature_extractor) for begin, end in split_indices ] if isinstance(manifest, CutSet): contents = maybe_randomize(manifest.cuts.items()) return [ CutSet(cuts=dict(contents[begin:end])) for begin, end in split_indices ] raise ValueError(f"Unknown type of manifest: {type(manifest)}")
def cuts(): cuts = CutSet.from_file("test/fixtures/libri/cuts.json") # Concatenate 100 cut sets together, starting with an empty CutSet() return sum((cuts.modify_ids(lambda cid: cid + str(i)) for i in range(100)), CutSet())