Ejemplo n.º 1
0
def test_cut_set_subset_cut_ids_preserves_order():
    cuts = DummyManifest(CutSet, begin_id=0, end_id=1000)
    cut_ids = ["dummy-cut-0010", "dummy-cut-0171", "dummy-cut-0009"]
    subcuts = cuts.subset(cut_ids=cut_ids)
    cut1, cut2, cut3 = subcuts
    assert cut1.id == "dummy-cut-0010"
    assert cut2.id == "dummy-cut-0171"
    assert cut3.id == "dummy-cut-0009"
Ejemplo n.º 2
0
def test_cut_set_subset_cut_ids_preserves_order_with_lazy_manifest():
    cuts = DummyManifest(CutSet, begin_id=0, end_id=1000)
    cut_ids = ["dummy-cut-0010", "dummy-cut-0171", "dummy-cut-0009"]
    with NamedTemporaryFile(suffix=".jsonl.gz") as f:
        cuts.to_file(f.name)
        cuts = cuts.from_jsonl_lazy(f.name)
        subcuts = cuts.subset(cut_ids=cut_ids)
        cut1, cut2, cut3 = subcuts
        assert cut1.id == "dummy-cut-0010"
        assert cut2.id == "dummy-cut-0171"
        assert cut3.id == "dummy-cut-0009"
Ejemplo n.º 3
0
def test_subset_raises(manifest_type, first, last):
    any_set = DummyManifest(manifest_type, begin_id=0, end_id=200)
    with pytest.raises(AssertionError):
        subset = any_set.subset(first=first, last=last)
Ejemplo n.º 4
0
def test_subset_last(manifest_type):
    any_set = DummyManifest(manifest_type, begin_id=0, end_id=200)
    expected = DummyManifest(manifest_type, begin_id=190, end_id=200)
    subset = any_set.subset(last=10)
    assert subset == expected
Ejemplo n.º 5
0
        BucketingSampler(CUTS, CUTS, max_source_duration=10.0, shuffle=True, drop_last=True, num_buckets=2,
                         sampler_type=CutPairsSampler),
        BucketingSampler(CUTS, CUTS, num_buckets=2, sampler_type=CutPairsSampler),
    ),
    lambda: (
        DynamicBucketingSampler(CUTS, max_duration=10.0, shuffle=True, drop_last=True, num_buckets=2),
        DynamicBucketingSampler(CUTS, max_duration=10.0, num_buckets=2),
    ),
    lambda: (
        DynamicCutSampler(CUTS, max_duration=10.0, shuffle=True, drop_last=True),
        DynamicCutSampler(CUTS, max_duration=10.0),
    ),
    # Differently initialized RoundRobinSampler with the same CUTS
    lambda: (
        RoundRobinSampler(
            SingleCutSampler(CUTS.subset(first=50), max_duration=10.0, shuffle=True, drop_last=True),
            SingleCutSampler(CUTS_MOD.subset(first=50), max_duration=10.0, shuffle=True, drop_last=True),
        ),
        RoundRobinSampler(
            SingleCutSampler(CUTS.subset(first=50)),
            SingleCutSampler(CUTS_MOD.subset(first=50)),
        ),
    ),
]
# fmt: on


@pytest.mark.parametrize("create_samplers", SAMPLERS_TO_TEST)
def test_restore_sampler_state(create_samplers):
    sampler, restored_sampler = create_samplers()
    # Iterate a full epoch through the sampler first to accumulate some sampling diagnostics.
Ejemplo n.º 6
0
        DynamicBucketingSampler(CUTS,
                                max_duration=10.0,
                                shuffle=True,
                                drop_last=True,
                                num_buckets=2),
        DynamicBucketingSampler(CUTS, max_duration=10.0, num_buckets=2),
    ),
    lambda: (
        DynamicCutSampler(
            CUTS, max_duration=10.0, shuffle=True, drop_last=True),
        DynamicCutSampler(CUTS, max_duration=10.0),
    ),
    # Differently initialized RoundRobinSampler with the same CUTS
    lambda: (
        RoundRobinSampler(
            SingleCutSampler(CUTS.subset(first=50),
                             max_duration=10.0,
                             shuffle=True,
                             drop_last=True),
            SingleCutSampler(CUTS_MOD.subset(first=50),
                             max_duration=10.0,
                             shuffle=True,
                             drop_last=True),
        ),
        RoundRobinSampler(
            SingleCutSampler(CUTS.subset(first=50)),
            SingleCutSampler(CUTS_MOD.subset(first=50)),
        ),
    ),
]
# fmt: on