def test_cut_pairs_sampler_len(): # total duration is 55 seconds # each second has 100 frames cuts = CutSet.from_cuts(dummy_cut(idx, duration=float(idx)) for idx in range(1, 11)) sampler = CutPairsSampler( source_cuts=cuts, target_cuts=cuts, shuffle=True, max_source_frames=10 * 100, max_target_frames=10 * 100, ) for epoch in range(5): assert len(sampler) == len([batch for batch in sampler]) sampler.set_epoch(epoch)
def test_cut_pairs_sampler_order_is_deterministic_given_epoch(): # The dummy cuts have a duration of 1 second each cut_set = DummyManifest(CutSet, begin_id=0, end_id=100) sampler = CutPairsSampler( source_cuts=cut_set, target_cuts=cut_set, shuffle=True, # Set an effective batch size of 10 cuts, as all have 1s duration == 100 frames # This way we're testing that it works okay when returning multiple batches in # a full epoch. max_source_frames=1000, max_target_frames=500, ) sampler.set_epoch(42) # calling the sampler twice without epoch update gives identical ordering assert [item for item in sampler] == [item for item in sampler]
def test_cut_pairs_sampler_order_differs_between_epochs(): # The dummy cuts have a duration of 1 second each cut_set = DummyManifest(CutSet, begin_id=0, end_id=100) sampler = CutPairsSampler( source_cuts=cut_set, target_cuts=cut_set, shuffle=True, # Set an effective batch size of 10 cuts, as all have 1s duration == 100 frames # This way we're testing that it works okay when returning multiple batches in # a full epoch. max_source_frames=1000, max_target_frames=500, ) last_order = [item for item in sampler] for epoch in range(1, 6): sampler.set_epoch(epoch) new_order = [item for item in sampler] assert new_order != last_order last_order = new_order