def test_generate_mixes_using_all_examples_plus_random_examples(self): sourceid_to_exids = ([('source1', i) for i in range(100)] + [('source2', i) for i in range(100)]) exid_to_mixids = create_dataset_lib.generate_mixes( val='unused', num_mixes=200, sourceid_to_exids=sourceid_to_exids) self.assertEqual(set(range(100)), exid_to_mixids.keys())
def test_generate_unique_mixes(self): sourceid_to_exids = [('source1', 'a'), ('source1', 'b'), ('source2', 'c'), ('source2', 'd')] exid_to_mixids = create_dataset_lib.generate_mixes( val='unused', num_mixes=100, sourceid_to_exids=sourceid_to_exids) mix_ids = set(itertools.chain(*exid_to_mixids.values())) # Requested 100, but there are only 4 unique mixes, so that's how many # we should end up with. self.assertEqual(4, len(mix_ids))
def test_generate_num_mixes(self): sourceid_to_exids = [('source1', 'a'), ('source1', 'b'), ('source1', 'c'), ('source2', 'd'), ('source2', 'e'), ('source2', 'f')] exid_to_mixids = create_dataset_lib.generate_mixes( val='unused', num_mixes=4, sourceid_to_exids=sourceid_to_exids) mix_ids = set(itertools.chain(*list(exid_to_mixids.values()))) # Ensure we get the number of mixes we requested even when more unique mixes # would be possible. self.assertEqual(4, len(mix_ids))
def test_unique_mixes_duplicate_sources(self): sourceid_to_exids = [('source1', 'a'), ('source1', 'b'), ('source1', 'c'), ('source2', 'a'), ('source2', 'b'), ('source2', 'c'), ('source3', 'a'), ('source3', 'b'), ('source3', 'c')] exid_to_mixids = create_dataset_lib.generate_mixes( val='unused', num_mixes=100, sourceid_to_exids=sourceid_to_exids) mix_ids = set(itertools.chain(*list(exid_to_mixids.values()))) # There are only 3 unique ids, but we're request mixes of 3 items, so only # 1 unique mix is possible. self.assertEqual(1, len(mix_ids))