def test_working_set_hits(self): ws = WorkloadSettings(items=10 ** 3, workers=40, working_set=20, working_set_access=100, working_set_moving_docs=0, key_fmtr='hex') keys = set() for worker in range(ws.workers): for key in docgen.SequentialKey(sid=worker, ws=ws, prefix='test'): keys.add(key.string) keys = sorted(keys) hot_keys = set() for worker in range(ws.workers): for key in docgen.HotKey(sid=worker, ws=ws, prefix='test'): hot_keys.add(key.string) hot_keys = sorted(hot_keys) wsk = docgen.WorkingSetKey(ws=ws, prefix='test') for op in range(10 ** 5): key = wsk.next(curr_items=ws.items, curr_deletes=100) self.assertIn(key.string, keys) if key.hit: self.assertIn(key.string, hot_keys) else: self.assertNotIn(key.string, hot_keys)
def test_new_working_set_hits(self): ws = WorkloadSettings(items=10**3, workers=40, working_set=20, working_set_access=100, working_set_moving_docs=0, key_fmtr='hex') hot_keys = set() for worker in range(ws.workers): for key in docgen.HotKey(sid=worker, ws=ws, prefix='test'): hot_keys.add(key.string) hot_keys = sorted(hot_keys) wsk = docgen.WorkingSetKey(ws=ws, prefix='test') hits = set() news_items = 10 for op in range(10**5): key = wsk.next(curr_items=ws.items + news_items, curr_deletes=100) if key.hit: hits.add(key.string) overlap = set(hot_keys) & hits self.assertEqual(len(overlap), ws.items * (ws.working_set / 100) - news_items)
def test_hot_keys(self): ws = WorkloadSettings(items=10 ** 4, workers=40, working_set=10, working_set_access=100, working_set_moving_docs=0, key_fmtr='decimal') keys = set() for worker in range(ws.workers): for key in docgen.SequentialKey(sid=worker, ws=ws, prefix='test'): self.assertNotIn(key.string, keys) keys.add(key.string) self.assertEqual(len(keys), ws.items) hot_keys = set() for worker in range(ws.workers): for key in docgen.HotKey(sid=worker, ws=ws, prefix='test'): self.assertNotIn(key.string, hot_keys) self.assertIn(key.string, keys) hot_keys.add(key.string) self.assertEqual(len(hot_keys), ws.working_set * ws.items // 100)
def test_zipf_generator_cache_miss(self): num_ops = 10 ** 5 ws = WorkloadSettings(items=10 ** 5, workers=40, working_set=1.6, working_set_access=90, working_set_moving_docs=0, key_fmtr='hex') hot_keys = set() for worker in range(ws.workers): for key in docgen.HotKey(sid=worker, ws=ws, prefix='test'): hot_keys.add(key.string) key_gen = docgen.ZipfKey(prefix='test', fmtr=ws.key_fmtr, alpha=1.23) misses = 0 for op in range(num_ops): key = key_gen.next(curr_deletes=100, curr_items=ws.items) if key.string not in hot_keys: misses += 1 hit_rate = 100 * (1 - misses / num_ops) self.assertAlmostEqual(hit_rate, ws.working_set_access, delta=0.5)
def test_hot_keys(self): ws = WorkloadSettings(items=10**4, workers=40, working_set=10, working_set_access=100, working_set_moving_docs=0) keys = set() for worker in range(ws.workers): for key in docgen.UnorderedKey(sid=worker, ws=ws, prefix='test'): self.assertNotIn(key, keys) keys.add(key) self.assertEqual(len(keys), ws.items) hot_keys = set() for worker in range(ws.workers): for key in docgen.HotKey(sid=worker, ws=ws, prefix='test'): self.assertNotIn(key, hot_keys) self.assertIn(key, keys) hot_keys.add(key) self.assertEqual(len(hot_keys), ws.working_set * ws.items // 100)