Beispiel #1
0
def test_serialize_padded_cut_set(cut_set):
    # cut_set fixture is defined in test/cut/conftest.py
    padded_cut_set = cut_set.pad(60.1)
    with NamedTemporaryFile() as f:
        padded_cut_set.to_json(f.name)
        restored = CutSet.from_json(f.name)
    assert padded_cut_set == restored
Beispiel #2
0
def mixed_feature_cut() -> MixedCut:
    cut_set = CutSet.from_json(
        'test/fixtures/mix_cut_test/overlayed_cut_manifest.json')
    mixed_cut = cut_set['mixed-cut-id']
    assert mixed_cut.num_frames == 1360
    assert isclose(mixed_cut.duration, 13.595)
    return mixed_cut
Beispiel #3
0
def mixed_audio_cut() -> MixedCut:
    cut_set = CutSet.from_json(
        "test/fixtures/mix_cut_test/overlayed_audio_cut_manifest.json"
    )
    mixed_cut = cut_set["mixed-cut-id"]
    assert isclose(mixed_cut.duration, 14.4)
    return mixed_cut
Beispiel #4
0
def test_cut_into_windows():
    cuts0 = CutSet.from_json(
        "test/fixtures/ljspeech/cuts.json")  # has 2 cuts of 1.54s and 1.6s
    cuts = cuts0.cut_into_windows(duration=0.5, hop=0.4)  # 0, 0.4, 0.8, 1.2
    starts = [cut.start for cut in cuts]
    assert starts == approx([0, 0.4, 0.8, 1.2, 0, 0.4, 0.8, 1.2])
    durations = [cut.duration for cut in cuts]
    assert durations == approx(
        [0.5, 0.5, 0.5, 0.3396371882, 0.5, 0.5, 0.5, 0.39768707483])
Beispiel #5
0
def test_store_audio(cut_set):
    cut_set = CutSet.from_json('test/fixtures/libri/cuts.json')
    with TemporaryDirectory() as tmpdir:
        stored_cut_set = cut_set.compute_and_store_recordings(tmpdir)
        for cut1, cut2 in zip(cut_set, stored_cut_set):
            samples1 = cut1.load_audio()
            samples2 = cut2.load_audio()
            assert np.array_equal(samples1, samples2)
        assert len(stored_cut_set) == len(cut_set)
Beispiel #6
0
def pad(cut_manifest: Pathlike, output_cut_manifest: Pathlike,
        duration: Optional[float]):
    """
    Create a new CutSet by padding the cuts in CUT_MANIFEST. The cuts will be right-padded, i.e. the padding
    is placed after the signal ends.
    """
    cut_set = CutSet.from_json(cut_manifest)
    padded_cut_set = cut_set.pad(desired_duration=duration)
    padded_cut_set.to_json(output_cut_manifest)
Beispiel #7
0
def test_mixed_cut_set_serialization(cut_set_with_mixed_cut, format, compressed):
    with NamedTemporaryFile(suffix='.gz' if compressed else '') as f:
        if format == 'yaml':
            cut_set_with_mixed_cut.to_yaml(f.name)
            restored = CutSet.from_yaml(f.name)
        if format == 'json':
            cut_set_with_mixed_cut.to_json(f.name)
            restored = CutSet.from_json(f.name)
    assert cut_set_with_mixed_cut == restored
Beispiel #8
0
def test_mixed_cut_load_features():
    expected_frame_count = 1360
    cut_set = CutSet.from_json(
        'test/fixtures/mix_cut_test/overlayed_cut_manifest.json')
    mixed_cut = cut_set['mixed-cut-id']
    assert mixed_cut.num_frames == expected_frame_count
    assert isclose(mixed_cut.duration, 13.595)

    feats = mixed_cut.load_features()
    assert feats.shape[0] == expected_frame_count
Beispiel #9
0
def test_compute_cmvn_stats():
    cut_set = CutSet.from_json('test/fixtures/libri/cuts.json')
    with NamedTemporaryFile() as f:
        stats = cut_set.compute_global_feature_stats(storage_path=f.name)
        f.flush()
        read_stats = pickle.load(f)
    assert stats['norm_means'].shape == (cut_set[0].num_features, )
    assert stats['norm_stds'].shape == (cut_set[0].num_features, )
    assert (stats['norm_means'] == read_stats['norm_means']).all()
    assert (stats['norm_stds'] == read_stats['norm_stds']).all()
Beispiel #10
0
def mix_sequential(cut_manifests: List[Pathlike], output_cut_manifest: Pathlike):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST by iterating jointly over CUT_MANIFESTS and mixing the Cuts
    on the same positions. E.g. the first output cut is created from the first cuts in each input manifest.
    The mix is performed by summing the features from all Cuts.
    If the CUT_MANIFESTS have different number of Cuts, the mixing ends when the shorter manifest is depleted.
    """
    cut_manifests = [CutSet.from_json(path) for path in cut_manifests]
    mixed_cut_set = CutSet.from_cuts(mix_cuts(cuts) for cuts in zip(*cut_manifests))
    mixed_cut_set.to_file(output_cut_manifest)
Beispiel #11
0
def mix_by_recording_id(cut_manifests: List[Pathlike],
                        output_cut_manifest: Pathlike):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST by matching the Cuts from CUT_MANIFESTS by their recording IDs
    and mixing them together.
    """
    all_cuts = combine(*[CutSet.from_json(path) for path in cut_manifests])
    recording_id_to_cuts = groupby(lambda cut: cut.recording_id, all_cuts)
    mixed_cut_set = CutSet.from_cuts(
        mix_cuts(cuts) for recording_id, cuts in recording_id_to_cuts.items())
    mixed_cut_set.to_json(output_cut_manifest)
Beispiel #12
0
def append(
        cut_manifests: List[Pathlike],
        output_cut_manifest: Pathlike,
):
    """
    Create a new CutSet by appending the cuts in CUT_MANIFESTS. CUT_MANIFESTS are iterated position-wise (the
    cuts on i'th position in each manfiest are appended to each other).
    The cuts are appended in the order in which they appear in the
    input argument list.
    If CUT_MANIFESTS have different lengths, the script stops once the shortest CutSet is depleted.
    """
    cut_sets = [CutSet.from_json(path) for path in cut_manifests]
    appended_cut_set = CutSet.from_cuts(append_cuts(cuts) for cuts in zip(*cut_sets))
    appended_cut_set.to_json(output_cut_manifest)
Beispiel #13
0
def truncate(
    cut_manifest: Pathlike,
    output_cut_manifest: Pathlike,
    preserve_id: bool,
    max_duration: float,
    offset_type: str,
    keep_overflowing_supervisions: bool,
):
    """
    Truncate the cuts in the CUT_MANIFEST and write them to OUTPUT_CUT_MANIFEST.
    Cuts shorter than MAX_DURATION will not be modified.
    """
    cut_set = CutSet.from_json(cut_manifest)
    truncated_cut_set = cut_set.truncate(
        max_duration=max_duration,
        offset_type=offset_type,
        keep_excessive_supervisions=keep_overflowing_supervisions,
        preserve_id=preserve_id)
    truncated_cut_set.to_json(output_cut_manifest)
def libri_cut_set():
    return CutSet.from_json("test/fixtures/libri/cuts.json")
Beispiel #15
0
def test_load_none_recording_cut_set():
    cutset = CutSet.from_json('test/fixtures/libri/cuts_no_recording.json')
    cut = list(cutset.cuts.values())[0]
    assert cut.recording is None
    assert cut.features is not None
Beispiel #16
0
def cut_set():
    return CutSet.from_json('test/fixtures/ami/cuts.json')
Beispiel #17
0
def libri_cut_set():
    return CutSet.from_json('test/fixtures/ljspeech/cuts.json')
Beispiel #18
0
def mixed_audio_cut():
    cut_set = CutSet.from_json(
        'test/fixtures/mix_cut_test/overlayed_audio_cut_manifest.json')
    mixed_cut = cut_set['mixed-cut-id']
    assert isclose(mixed_cut.duration, 14.4)
    return mixed_cut