def test_delete_prune_length_iteration_test(self): self.ht = HashTable(buckets=8) self.ht.update("bbboss") self.assertEqual(len(self.ht), 3) del self.ht['s'] self.assertEqual(len(self.ht), 2) self.ht.update("122333") self.assertEqual(len(self.ht), 5) # should iterate over 5 elements self.assertEqual(set(self.ht.items()), set({ 'b': 3, 'o': 1, '1': 1, '2': 2, '3': 3 }.items())) # the next add will overflow because the deleted bucket is still physically there so the 'real' size is 5 self.ht.update("!") # overflow removes '1', 'o', and the empty bucket self.assertEqual(len(self.ht), 4) # should iterate over 4 elements self.assertEqual(set(self.ht.items()), set({ 'b': 3, '!': 1, '2': 2, '3': 3 }.items())) self.assertEqual(self.ht.cardinality(), 7)
def test_itertype_use_unicode_false(self): self.ht = HashTable(buckets=64, use_unicode=False) self.ht.update([b'bytes', u'Unicode', 'String']) for key in self.ht: self.assertEqual(type(key), bytes) for key, _ in self.ht.items(): self.assertEqual(type(key), bytes)
def test_pickle_large(self): self.ht = HashTable(buckets=2 ** 25) self.ht.update("boss") self.ht.update("pickling") self.ht.update("verylargetable") reloaded = self.store_and_load() self.check_hashtable(reloaded)
def test_increment_by_number_greater_than_long_long_max(self): """ Negative test: increment fails on a number which is larger than long long's max """ ht = HashTable(buckets=64) with self.assertRaises(OverflowError): ht.increment('toomuch', long_long_max + 1) self.assertEqual(ht['toomuch'], 0, 'Should be unaffected')
class HashTableIterTypeTest(unittest.TestCase): """ Functional tests for HashTable use_unicode parameter """ def setUp(self): self.ht = HashTable(buckets=64) def test_itertype_default_unicode(self): self.ht.update([b'bytes', u'Unicode', 'String']) for key in self.ht: self.assertEqual(type(key), uni_type) for key, _ in self.ht.items(): self.assertEqual(type(key), uni_type) def test_itertype_use_unicode_true(self): self.ht = HashTable(buckets=64, use_unicode=True) self.ht.update([b'bytes', u'Unicode', 'String']) for key in self.ht: self.assertEqual(type(key), uni_type) for key, _ in self.ht.items(): self.assertEqual(type(key), uni_type) def test_itertype_use_unicode_false(self): self.ht = HashTable(buckets=64, use_unicode=False) self.ht.update([b'bytes', u'Unicode', 'String']) for key in self.ht: self.assertEqual(type(key), bytes) for key, _ in self.ht.items(): self.assertEqual(type(key), bytes)
def test_update_with_hashtable(self): """ Update with a dictionary and test against it using set representation """ data1 = {'a': 1, 'b': 3, 'c': 2, 'd': 5} data2 = {'a': 18, 'b': 4, 'c': 6, 'e': 13} expected = {'a': 19, 'b': 7, 'c': 8, 'd': 5, 'e': 13} self.ht.update(data1) ht2 = HashTable(64) ht2.update(data2) self.ht.update(ht2) self.assertEqual(set(self.ht.items()), set(expected.items()))
def test_invalid_default_init(self): """ Negative test for initialization with too few buckets """ for invalid_bucket_count in ["foo", dict()]: with self.assertRaises(TypeError, msg="Constructor should throw Type for non-numeric arguments"): HashTable(buckets=invalid_bucket_count)
def test_invalid_buckets_init(self): """ Negative test for initialization with too few buckets """ for invalid_bucket_count in [1, 2, 3, -3, 2 ** 32]: with self.assertRaises( ValueError, msg=("Constructor should throw ValueError for count %d" % invalid_bucket_count)): HashTable(buckets=invalid_bucket_count)
def test_increment_negative(self): ht = HashTable(buckets=64) ht.increment('foo', 3) # new value with self.assertRaises(ValueError): ht.increment('bar', -1) # existing value with self.assertRaises(ValueError): ht.increment('foo', -2)
def test_buckets_init(self): """ Test that the table initializes itself with the number of buckets equal to the greatest power of 2 lower than the argument """ self.assertEqual(HashTable(buckets=4).buckets(), 4) self.assertEqual(HashTable(buckets=5).buckets(), 4) self.assertEqual(HashTable(buckets=16).buckets(), 16) self.assertEqual(HashTable(buckets=17).buckets(), 16) self.assertEqual(HashTable(buckets=31).buckets(), 16) self.assertEqual(HashTable(buckets=(2 ** 16 - 1)).buckets(), 2 ** 15) self.assertEqual(HashTable(buckets=(2 ** 16)).buckets(), 2 ** 16) self.assertEqual(HashTable(buckets=(2 ** 24 - 1)).buckets(), 2 ** 23)
def test_quality_default(self): ht = HashTable(buckets=1024) """ Uses the default structure """ self.assertEqual(ht.quality(), 0) for i in range(512): ht.increment(str(i), 1 + (i % 13)) self.assertAlmostEqual(ht.quality(), 2.0 / 3) for i in range(1024): ht.increment(str(1024 + i), 1 + (i % 17)) self.assertAlmostEqual(ht.quality(), 2.0, delta=0.015)
def test_increment_by_big_number(self): ht = HashTable(buckets=64) # increment by big number big_number = 68728041949 ht.increment('big number', big_number) self.assertEqual(ht['big number'], big_number) ht.increment('big number', 1) self.assertEqual(ht['big number'], big_number + 1)
def test_default_init(self): """ Test that the table initializes itself with the number of buckets equal to the greatest power of 2 lower than the argument """ self.assertEqual(HashTable(1).buckets(), 2 ** 15) self.assertEqual(HashTable(2).buckets(), 2 ** 16) self.assertEqual(HashTable(3).buckets(), 2 ** 16) self.assertEqual(HashTable(15).buckets(), 2 ** 18) self.assertEqual(HashTable(16).buckets(), 2 ** 19) self.assertEqual(HashTable(512).buckets(), 2 ** 24)
def test_increment_by_zero(self): """ Tests that increment by zero does not affect the counter """ ht = HashTable(buckets=64) ht['bar'] = 2 ht.increment('foo', 0) self.assertEqual(ht['foo'], 0) ht.increment('bar', 0) self.assertEqual(ht['bar'], 2)
def test_increment_overflow(self): """ Negative test for overflowing on max counter value (long long max) """ ht = HashTable(buckets=64) ht['max'] = long_long_max with self.assertRaises(OverflowError): ht.increment('max', 1) three_quarters_of_long_long_max = int(long_long_max * 3 / 4) ht['foo'] = three_quarters_of_long_long_max with self.assertRaises(OverflowError): ht.increment('max', three_quarters_of_long_long_max)
class HashTablePickleTest(unittest.TestCase): """ Functional tests for determining size (cardinality) of hashtable and iterations. """ def setUp(self): self.ht = HashTable(buckets=64) def tearDown(self): if os.path.isfile(filename): os.remove(filename) def check_hashtable(self, reloaded): self.assertEqual(len(reloaded), len(self.ht)) self.assertEqual(reloaded.buckets(), self.ht.buckets()) self.assertEqual(reloaded.total(), self.ht.total()) self.assertEqual(set(reloaded.items()), set(self.ht.items())) self.assertEqual(reloaded.quality(), self.ht.quality()) self.assertEqual(reloaded.cardinality(), self.ht.cardinality()) def store_and_load(self): with open(filename, 'wb') as outfile: pickle.dump(self.ht, outfile) with open(filename, 'rb') as outfile: reloaded = pickle.load(outfile) return reloaded def test_pickle_empty(self): reloaded = self.store_and_load() self.check_hashtable(reloaded) def test_pickle_simple(self): self.ht.update("boss") self.ht.update("pickling") reloaded = self.store_and_load() self.check_hashtable(reloaded) def test_pickle_deleted(self): self.ht.update("boss") self.ht.update("pickling") del self.ht['g'] del self.ht['s'] reloaded = self.store_and_load() self.check_hashtable(reloaded) def test_pickle_pruned(self): for i in range(120): self.ht.increment(str(i), 1 + ((i * 27) % 17)) reloaded = self.store_and_load() self.check_hashtable(reloaded) def test_pickle_large(self): self.ht = HashTable(buckets=2 ** 25) self.ht.update("boss") self.ht.update("pickling") self.ht.update("verylargetable") reloaded = self.store_and_load() self.check_hashtable(reloaded)
class HashTableUpdateTest(unittest.TestCase): """ Functional tests for HashTable.update method, which adds another counter, dictionary, hashtable, tuple or list """ def setUp(self): self.ht = HashTable(buckets=64) def test_update_numbers(self): """ Negative test: calling update using numeric values as parameter yields TypeError """ with self.assertRaises(TypeError): self.ht.update(1) with self.assertRaises(TypeError): self.ht.update(1.0) def test_update_string(self): self.ht.update("foo") self.assertEqual(self.ht['f'], 1) self.assertEqual(self.ht['o'], 2) def test_update_tuple(self): tuple = ('foo', 'bar', 'foo') self.ht.update(tuple) self.assertEqual(self.ht['foo'], 2) self.assertEqual(self.ht['bar'], 1) def test_update_list(self): self.ht.update([str(i % 3) for i in range(5)]) self.assertEqual(self.ht['0'], 2) self.assertEqual(self.ht['1'], 2) self.assertEqual(self.ht['2'], 1) def test_update_split(self): self.ht.update("This is a sentence".split()) self.assertEqual(self.ht['is'], 1) self.assertEqual(self.ht['this'], 0) # lowercase def test_update_twice(self): tuple = ('foo', 'bar', 'foo') self.ht.update(tuple) self.ht.update(('foo', 'bar', 'foo')) self.assertEqual(self.ht['foo'], 4) self.assertEqual(self.ht['bar'], 2) def test_update_bytes(self): tuple = ('foo', 'bar', b'foo') self.ht.update(tuple) self.assertEqual(self.ht['foo'], 2) self.assertEqual(self.ht[b'foo'], 2) def test_update_unicode(self): tuple = ('foo', 'bar', u'foo') self.ht.update(tuple) self.assertEqual(self.ht['foo'], 2) self.assertEqual(self.ht[u'foo'], 2) def test_update_with_dictionary(self): """ Update with a dictionary and test against it using set representation """ data = {'a': 1, 'b': 3, 'c': 2, 'd': 5} self.ht.update(data) self.assertEqual(self.ht['b'], 3) self.assertEqual(set(self.ht.items()), set(data.items())) def test_update_with_hashtable(self): """ Update with a dictionary and test against it using set representation """ data1 = {'a': 1, 'b': 3, 'c': 2, 'd': 5} data2 = {'a': 18, 'b': 4, 'c': 6, 'e': 13} expected = {'a': 19, 'b': 7, 'c': 8, 'd': 5, 'e': 13} self.ht.update(data1) ht2 = HashTable(64) ht2.update(data2) self.ht.update(ht2) self.assertEqual(set(self.ht.items()), set(expected.items()))
def test_prune_total(self): self.ht = HashTable(buckets=4) self.ht.update("223334444") self.assertEqual(self.ht.total(), 9) self.ht.update("1") self.assertEqual(self.ht.total(), 10)
class HashTableTotalTest(unittest.TestCase): """ Functional tests for HashTable.total method """ def setUp(self): self.ht = HashTable(buckets=64) def test_simple_total(self): self.assertEqual(self.ht.total(), 0) self.ht.update("foo") self.assertEqual(self.ht.total(), 3) def test_set_reset_total(self): self.ht.update("foo") self.assertEqual(self.ht.total(), 3) self.ht['o'] += 2 self.assertEqual(self.ht.total(), 5) self.ht['f'] = 0 self.assertEqual(self.ht.total(), 4) def test_increment_total(self): self.ht.update("foo") self.ht.increment("f", 5) self.assertEqual(self.ht.total(), 8) self.ht.increment("a", 2) self.assertEqual(self.ht.total(), 10) self.ht.increment("r", 0) self.assertEqual(self.ht.total(), 10) def test_delete_total(self): self.ht.update("foo") del self.ht['o'] self.assertEqual(self.ht.total(), 1) def test_prune_total(self): self.ht = HashTable(buckets=4) self.ht.update("223334444") self.assertEqual(self.ht.total(), 9) self.ht.update("1") self.assertEqual(self.ht.total(), 10)
def test_prune_total(self): ht = HashTable(buckets=8) ht.update({'a': 3, 'b': 2, 'c': 4, 'd': 1, 'e': 5, 'f': 6}) ht.update("xbbbd") self.assertEqual(ht.total(), 26)
def test_prune_with_eight_buckets(self): """ Tests prune with table of 8 buckets, which stores at most 6 elements and prunes itself down to 4 or less. """ ht = HashTable(buckets=8) # Init ht.update({'a': 3, 'b': 2, 'c': 4, 'd': 1, 'e': 5, 'f': 6}) self.assertEqual( set(ht.items()), set({ 'd': 1, 'b': 2, 'a': 3, 'c': 4, 'e': 5, 'f': 6 }.items())) self.assertEqual(len(ht), 6) # Evicts 2 elements (so that half of the table is free) before adding x ht.update(['x']) self.assertEqual(set(ht.items()), set({ 'x': 1, 'a': 3, 'c': 4, 'e': 5, 'f': 6 }.items())) self.assertEqual(len(ht), 5) # Evicts 3 elements because 'a' and 'b' share the count which needs to be included in the limit ht.update(['b', 'b', 'b']) self.assertEqual( set(ht.items()), set({ 'x': 1, 'b': 3, 'a': 3, 'c': 4, 'e': 5, 'f': 6 }.items())) ht['d'] += 1 self.assertEqual(set(ht.items()), set({ 'd': 1, 'c': 4, 'e': 5, 'f': 6 }.items()))
def test_prune_with_four_buckets(self): """ Tests prune with a stupid table of 4 buckets, this can store at most 3 elements at once. """ ht = HashTable(buckets=4) # Adds elements one by one. d replaces 'e', then 'e' replaces 'b' ht.update({'e': 1, 'a': 3, 'b': 2}) ht.update({'d': 5}) ht.update({'e': 4}) self.assertEqual(set(ht.items()), set({ 'a': 3, 'd': 5, 'e': 4 }.items())) self.assertEqual(len(ht), 3, "The number of elements after pruning should be 3") # Increment existing 'a', then add 'b' which evicts 2 elements with the lowest value of 4 ht.increment('a') self.assertEqual(len(ht), 3) self.assertEqual(set(ht.items()), set({ 'a': 4, 'd': 5, 'e': 4 }.items())) ht.increment('b') self.assertEqual(len(ht), 2, "The number of elements after pruning should be 3") self.assertEqual(set(ht.items()), set({'b': 1, 'd': 5}.items()))
def test_prune_cardinality(self): ht = HashTable(buckets=8) ht.update({'a': 3, 'b': 2, 'c': 4, 'd': 1, 'e': 5, 'f': 6}) ht.update("xbgbbd") self.assertEqual(ht.cardinality(), 8)
def test_none_init(self): with self.assertRaises( TypeError, msg=("Constructor should throw ValueError for no parameters")): HashTable()
def test_both_init(self): """ Test that the table initializes itself with the number of buckets equal to the greatest power of 2 lower than the argument """ self.assertEqual(HashTable(size_mb=16, buckets=1024).buckets(), 1024)
def setUp(self): self.ht = HashTable(buckets=64)
def test_basic_increments(self): """ Tests increment operation """ ht = HashTable(buckets=64) # new element ht.increment('New element by default') self.assertEqual(ht['New element by default'], 1) # new element by X ht.increment('New element by 3', 3) self.assertEqual(ht['New element by 3'], 3) # repeated increments ht.increment('2 repeated increments') ht.increment('2 repeated increments') self.assertEqual(ht['2 repeated increments'], 2) # repeated increments by X ht.increment('3 repeated increments by 4', 4) ht.increment('3 repeated increments by 4', 4) ht.increment('3 repeated increments by 4', 4) self.assertEqual(ht['3 repeated increments by 4'], 12)
def test_increment_by_long_long_max(self): ht = HashTable(buckets=64) ht.increment('max', long_long_max) self.assertEqual(ht['max'], long_long_max)
class HashTableItemsTest(unittest.TestCase): """ Functional tests for determining size (cardinality) of hashtable and iterations. """ def setUp(self): self.ht = HashTable(buckets=64) def test_simple_length_test(self): self.assertEqual(len(self.ht), 0) self.ht.update("boss") self.assertEqual(len(self.ht), 3) self.ht.update("sad") self.assertEqual(len(self.ht), 5) def test_simple_cardinality_test(self): self.assertEqual(self.ht.cardinality(), 0) self.ht.update("boss") self.assertEqual(self.ht.cardinality(), 3) self.ht.update("sad") self.assertEqual(self.ht.cardinality(), 5) def test_delete_length_test(self): self.ht.update("boss") self.assertEqual(len(self.ht), 3) del self.ht['s'] self.assertEqual(len(self.ht), 2) del self.ht['s'] self.assertEqual(len(self.ht), 2) def test_delete_prune_length_iteration_test(self): self.ht = HashTable(buckets=8) self.ht.update("bbboss") self.assertEqual(len(self.ht), 3) del self.ht['s'] self.assertEqual(len(self.ht), 2) self.ht.update("122333") self.assertEqual(len(self.ht), 5) # should iterate over 5 elements self.assertEqual(set(self.ht.items()), set({ 'b': 3, 'o': 1, '1': 1, '2': 2, '3': 3 }.items())) # the next add will overflow because the deleted bucket is still physically there so the 'real' size is 5 self.ht.update("!") # overflow removes '1', 'o', and the empty bucket self.assertEqual(len(self.ht), 4) # should iterate over 4 elements self.assertEqual(set(self.ht.items()), set({ 'b': 3, '!': 1, '2': 2, '3': 3 }.items())) self.assertEqual(self.ht.cardinality(), 7)