Example #1
0
def test_mixed_cut_set_serialization(cut_set_with_mixed_cut, format, compressed):
    with NamedTemporaryFile(suffix='.gz' if compressed else '') as f:
        if format == 'yaml':
            cut_set_with_mixed_cut.to_yaml(f.name)
            restored = CutSet.from_yaml(f.name)
        if format == 'json':
            cut_set_with_mixed_cut.to_json(f.name)
            restored = CutSet.from_json(f.name)
    assert cut_set_with_mixed_cut == restored
Example #2
0
def test_mixed_cut_load_features():
    expected_frame_count = 1360
    cut_set = CutSet.from_yaml(
        'test/fixtures/mix_cut_test/overlayed_cut_manifest.yml')
    mixed_cut = cut_set['mixed-cut-id']
    assert mixed_cut.num_frames == expected_frame_count
    assert isclose(mixed_cut.duration, 13.595)

    feats = mixed_cut.load_features()
    assert feats.shape[0] == expected_frame_count
Example #3
0
def mix_by_recording_id(
        cut_manifests: List[Pathlike],
        output_cut_manifest: Pathlike
):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST by matching the Cuts from CUT_MANIFESTS by their recording IDs
    and mixing them together.
    """
    all_cuts = combine(*[CutSet.from_yaml(path) for path in cut_manifests])
    recording_id_to_cuts = groupby(lambda cut: cut.recording_id, all_cuts)
    mixed_cut_set = CutSet.from_cuts(mix_cuts(cuts) for recording_id, cuts in recording_id_to_cuts.items())
    mixed_cut_set.to_yaml(output_cut_manifest)
Example #4
0
def mix_sequential(
        cut_manifests: List[Pathlike],
        output_cut_manifest: Pathlike
):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST by iterating jointly over CUT_MANIFESTS and mixing the Cuts
    on the same positions. E.g. the first output cut is created from the first cuts in each input manifest.
    The mix is performed by summing the features from all Cuts.
    If the CUT_MANIFESTS have different number of Cuts, the mixing ends when the shorter manifest is depleted.
    """
    cut_manifests = [CutSet.from_yaml(path) for path in cut_manifests]
    mixed_cut_set = CutSet.from_cuts(mix_cuts(cuts) for cuts in zip(*cut_manifests))
    mixed_cut_set.to_yaml(output_cut_manifest)
Example #5
0
def append(
        cut_manifests: List[Pathlike],
        output_cut_manifest: Pathlike,
):
    """
    Create a new CutSet by appending the cuts in CUT_MANIFESTS. CUT_MANIFESTS are iterated position-wise (the
    cuts on i'th position in each manfiest are appended to each other).
    The cuts are appended in the order in which they appear in the
    input argument list.
    If CUT_MANIFESTS have different lengths, the script stops once the shortest CutSet is depleted.
    """
    cut_sets = [CutSet.from_yaml(path) for path in cut_manifests]
    appended_cut_set = CutSet.from_cuts(append_cuts(cuts) for cuts in zip(*cut_sets))
    appended_cut_set.to_yaml(output_cut_manifest)
Example #6
0
def truncate(
        cut_manifest: Pathlike,
        output_cut_manifest: Pathlike,
        preserve_id: bool,
        max_duration: float,
        offset_type: str,
        keep_overflowing_supervisions: bool,
        random_seed: int
):
    """
    Truncate the cuts in the CUT_MANIFEST and write them to OUTPUT_CUT_MANIFEST.
    Cuts shorter than MAX_DURATION will not be modified.
    """
    fix_random_seed(random_seed)
    cut_set = CutSet.from_yaml(cut_manifest)
    truncated_cut_set = cut_set.truncate(
        max_duration=max_duration,
        offset_type=offset_type,
        keep_excessive_supervisions=keep_overflowing_supervisions,
        preserve_id=preserve_id
    )
    truncated_cut_set.to_yaml(output_cut_manifest)
Example #7
0
def libri_cut_set():
    return CutSet.from_yaml('test/fixtures/libri/cuts.yml')