def test_index(self): size = 7 table = HashTable(size) used = [0] * size max_iter = 10 ** 3 i = 0 def all_used(): return all(x > 0 for x in used) # make sure all indexes get used while i < max_iter and not all_used(): key = self.rand_string() index = table._index(key) self.assertTrue(index >= 0 and index < size) used[index] += 1 i += 1 self.assertTrue(all_used()) # make sure all indexes about equal usage while i < max_iter: key = self.rand_string() used[table._index(key)] += 1 i += 1 try: # min and max are no more than 4x apart self.assertTrue(max(used) < 4 * min(used)) except Exception as e: print(used) raise e
def test_set_get_latest(self): # the last entry for a key for a value should be the value h1 = HashTable(4) h1.set("A", 0) h1.set("A", 1) v = h1.get("A") self.assertEqual(v, 1)
def words_hashtable(): ht = HashTable(1000) with open('/usr/share/dict/words', 'r') as fh: for line in fh.readlines(): ht.set(line.strip(), line.strip()) return ht
def test_set(): foo = HashTable(size=1024) foo.set('foo', 'foo') foo.set('spoofs', 'spoofs') foo.set('utopia', 'utopia') assert foo.hashtable[91][0] == ('foo', 'foo') assert foo.hashtable[91][1] == ('spoofs', 'spoofs') assert foo.hashtable[885][0] == ('utopia', 'utopia')
def test_get(): foo = HashTable(size=1024) foo.hashtable[91].append(('foo', 'foo')) foo.hashtable[91].append(('spoofs', 'spoofs')) foo.hashtable[885].append(('utopia', 'utopia')) assert foo.get('foo') == 'foo' assert foo.get('spoofs') == 'spoofs' assert foo.get('utopia') == 'utopia'
def test_set_and_get_word_list(word_list): foo = HashTable() words = word_list for word in words: foo.set(word, word) for word in words: value = foo.get(word) assert word == value
def test_get_complete(): hash_table = HashTable(1024) word_list = [] with open('words') as f: for line in f: word = line.rstrip() word_list.append(word) hash_table.set(word, word) for word in word_list: assert hash_table.get(word) == word
def test_put(): ht = HashTable() # test with integer keys ht.put(9, 100) ht.put(20, 110) # test with string keys ht.put('cat', 125) ht.put('meow', 'blah!')
def test_set_get_manywords(self): h1 = HashTable(10000) f = open('/usr/share/dict/words') if f is not None: for line in f: h1.set(line, 0) # now test get on all the words sum = 0 for line in f: sum += h1.get(line) self.assertEqual(sum, 0)
def test_hash(self): upper = 16 h1 = HashTable(upper) with self.assertRaises(TypeError): for inst in MyFuncTestCase.non_string_instances: v = h1.hash(inst) for hs in MyFuncTestCase.hash_size_instances: h1 = HashTable(upper) for k in MyFuncTestCase.key_instances: v = h1.hash(k) self.assertGreaterEqual(v, 0) self.assertLessEqual(v, int(upper)-1)
def test_hash(): """Testing properties of the has function. Takes a long time""" # There are 235886 words in my dictionary. # 1.6 * the expected size is 377418, ideal for performance ideal_size = 377418 table = HashTable(ideal_size) word = "init" with pytest.raises(TypeError) as err: table.hash(12345) assert err.value == "Key must be a string" frequencies = dict() for i in xrange(ideal_size): frequencies[i] = 0 with io.open('/usr/share/dict/words') as words: while word != "": word = words.readline().strip() hashval = table.hash(word) assert 0 <= hashval < 377418 frequencies[hashval] += 1 emptyslots = 0 max_bucket_size = 0 for i in frequencies: if frequencies[i] == 0: emptyslots += 1 elif frequencies[i] > max_bucket_size: max_bucket_size = frequencies[i] print emptyslots, max_bucket_size # Should be roughly as many empty slots as filled slots # So, say, from 40% (2/5) to 60% (3/5) are empty # This also tests our "ideal_size" choice assert emptyslots > (ideal_size // 3) assert emptyslots > (ideal_size // 3) # And the maximum bucket size shouldn't be too big assert max_bucket_size < 10
def test_get_set_del(self): t = HashTable(2) t._table = [GetSetDelSpy() for _ in range(2)] t._index = lambda x: int(x / 31) - 1 _ = t[31] self.assertEqual(t._table[0].actions, [('g', 31)]) self.assertEqual(t._table[1].actions, []) _ = t[62] self.assertEqual(t._table[0].actions, [('g', 31)]) self.assertEqual(t._table[1].actions, [('g', 62)]) del t[31] self.assertEqual(t._table[0].actions, [('g', 31), ('d', 31)]) self.assertEqual(t._table[1].actions, [('g', 62)]) t[62] = 4 self.assertEqual(t._table[0].actions, [('g', 31), ('d', 31)]) self.assertEqual(t._table[1].actions, [('g', 62), ('s', 62, 4)])
def test_set_get_collisions(self): # without knowing hash function details, this is the only way # to ensure keys will collide by setting the size of the table h1 = HashTable(1) h1.set("test", 1776) h1.set("table", 1783) v1 = h1.get("test") v2 = h1.get("table") self.assertEqual(v2, 1783) self.assertEqual(v1, 1776)
def test_set_get_simple(self): # the value set for a key should have that value when retrieved for hs in MyFuncTestCase.hash_size_instances: h1 = HashTable(hs) h1.set("XYTZ", 1776) v = h1.get("XYTZ") self.assertEqual(v, 1776) for k in MyFuncTestCase.key_instances: h1.set(k, len(k)) v = h1.get(k) self.assertEqual(v, len(k)) for k in MyFuncTestCase.key_instances: v = h1.get(k) self.assertEqual(v, len(k))
def test_set_get(self): for hs in MyFuncTestCase.hash_size_instances: h1 = HashTable(hs) # check different key types with self.assertRaises(TypeError): for inst in MyFuncTestCase.non_string_instances: h1.set(inst, inst) # do the set/get of some various typed values and check if the # value is set immediately after entry for inst in MyFuncTestCase.non_string_instances: h1.set(str(inst), inst) v = h1.get(str(inst)) self.assertEqual(v, inst) # then, after all the values have been set, try to get them again for inst in MyFuncTestCase.non_string_instances: v = h1.get(str(inst)) self.assertEqual(v, inst)
def test_size(): ht = HashTable() # test with integer keys ht.put(9, 100) ht.put(20, 110) # test with string keys ht.put('cat', 125) ht.put('meow', 'blah!') assert(ht.size() == 4) ht.remove(9) ht.remove('cat') assert(ht.size() == 2)
def test_hash(): ht = HashTable(16) assert ht.hash('z') == 10 assert ht.hash('zzz') == 14
def hash_table(): sample_data = HashTable(capacity=100) sample_data["hola"] = "hello" sample_data[98.6] = 37 sample_data[False] = True return sample_data
def test_should_compare_unequal(hash_table): other = HashTable.from_dict({"different": "value"}) assert hash_table != other
def test_should_get_keys_of_empty_hash_table(): assert HashTable(capacity=100).keys == set()
def test_should_get_values_of_empty_hash_table(): assert HashTable(capacity=100).values == []
def test_should_report_length_of_empty_hash_table(): assert len(HashTable(capacity=100)) == 0
def test_should_create_hashtable(): assert HashTable(capacity=100) is not None
def test_should_not_contain_none_value_when_created(): assert None not in HashTable(capacity=100).values
def test_should_insert_none_value(): hash_table = HashTable(capacity=100) hash_table["key"] = None assert ("key", None) in hash_table.pairs
def test_should_create_empty_pair_slots(): assert HashTable(capacity=3)._slots == [None, None, None]
def test_should_report_capacity_of_empty_hash_table(): assert HashTable(capacity=100).capacity == 100
def test_should_compare_equal_different_capacity(): data = {"a": 1, "b": 2, "c": 3} h1 = HashTable.from_dict(data, capacity=50) h2 = HashTable.from_dict(data, capacity=100) assert h1 == h2
def test_hash_table_removes_correctly(self): ht = HashTable(0x10000) ht.put("key-0", "val-0") ht.put("key-1", "val-1") ht.put("key-2", "val-2") return_value = ht.get("key-0") self.assertTrue(return_value == "val-0") return_value = ht.get("key-1") self.assertTrue(return_value == "val-1") return_value = ht.get("key-2") self.assertTrue(return_value == "val-2") ht.delete("key-2") ht.delete("key-1") ht.delete("key-0") return_value = ht.get("key-0") self.assertTrue(return_value is None) return_value = ht.get("key-1") self.assertTrue(return_value is None) return_value = ht.get("key-2") self.assertTrue(return_value is None)
def test_set(): hash_table = HashTable(1024) hash_table.set('pig', 'pig') assert hash_table.buckets[320][0] == ['pig', 'pig']
def test_should_not_create_hashtable_with_zero_capacity(): with pytest.raises(ValueError): HashTable(capacity=0)
def test_duplicate_keys(): hash_table = HashTable(1024) hash_table.set('foo', 1) hash_table.set('foo', 2) assert hash_table.get('foo') == 2
def test_resize(self): ht = HashTable(2) # Set init_size to 2 assert ht.size == 0 assert len(ht.buckets) == 2 assert ht.load_factor() == 0 ht.set('I', 1) assert ht.size == 1 assert len(ht.buckets) == 2 assert ht.load_factor() == 0.5 ht.set('V', 5) # Should trigger resize assert ht.size == 2 assert len(ht.buckets) == 4 assert ht.load_factor() == 0.5 ht.set('X', 10) assert ht.size == 3 assert len(ht.buckets) == 4 assert ht.load_factor() == 0.75 ht.set('L', 50) # Should trigger resize assert ht.size == 4 assert len(ht.buckets) == 8 assert ht.load_factor() == 0.5
def test_should_create_hashtable_with_default_capacity(): assert HashTable().capacity == 8
def test_items(self): ht = HashTable() assert ht.items() == [] ht.set('I', 1) assert ht.items() == [('I', 1)] ht.set('V', 5) self.assertCountEqual(ht.items(), [('I', 1), ('V', 5)]) ht.set('X', 10) self.assertCountEqual(ht.items(), [('I', 1), ('V', 5), ('X', 10)])
def gimme_a_hash_table(): ht = HashTable() for word in words: ht.set(word, word) return ht
def test_should_return_duplicate_values(): hash_table = HashTable(capacity=100) hash_table["Alice"] = 24 hash_table["Bob"] = 42 hash_table["Joe"] = 42 assert [24, 42, 42] == sorted(hash_table.values)
def test_should_not_create_hashtable_with_negative_capacity(): with pytest.raises(ValueError): HashTable(capacity=-100)
def test_should_raise_error_on_missing_key(): hash_table = HashTable(capacity=100) with pytest.raises(KeyError) as exception_info: hash_table["missing_key"] assert exception_info.value.args[0] == "missing_key"
def test_should_compare_equal_different_key_value_order(hash_table): h1 = HashTable.from_dict({"a": 1, "b": 2, "c": 3}) h2 = HashTable.from_dict({"b": 2, "a": 1, "c": 3}) assert h1 == h2
def test_set_invalid_type(): hash_table = HashTable(1024) with pytest.raises(TypeError): hash_table.set(1, 'pig')
def test_length(self): ht = HashTable() assert ht.length() == 0 ht.set('I', 1) assert ht.length() == 1 ht.set('V', 5) assert ht.length() == 2 ht.set('X', 10) assert ht.length() == 3
def test_get_missing_key(): hash_table = HashTable(1024) hash_table.set('pig', 'pig') hash_table.set('sasquatch', 'sasquatch') with pytest.raises(KeyError): hash_table.get('dog')
def test_set_and_get(self): ht = HashTable() ht.set('I', 1) ht.set('V', 5) ht.set('X', 10) assert ht.get('I') == 1 assert ht.get('V') == 5 assert ht.get('X') == 10 assert ht.length() == 3 with self.assertRaises(KeyError): ht.get('A') # Key does not exist
def test_get_simple(): hash_table = HashTable(1024) hash_table.set('pig', 'pig') hash_table.set('sasquatch', 'sasquatch') assert hash_table.get('pig') == 'pig' assert hash_table.get('sasquatch') == 'sasquatch'
def test_set_twice_and_get(self): ht = HashTable() ht.set('I', 1) ht.set('V', 4) ht.set('X', 9) assert ht.length() == 3 ht.set('V', 5) # Update value ht.set('X', 10) # Update value assert ht.get('I') == 1 assert ht.get('V') == 5 assert ht.get('X') == 10 assert ht.length() == 3 # Check length is not overcounting
def formatDataForPlot(dateInterval, sortedData, span): #start by creating empty arrays for the data and and names. this is later returned data = [None]*sortedData[1] names = [None]*sortedData[1] stats = [None]*sortedData[1] #go through the loop for each name for j in range(0,sortedData[1]): #hash table for data entry HT = HashTable(int(span)*365/12) #container for the final data xdata=[] #the start date--(current date) tempdate = dateInterval[0] #loop that goes through each day and fills it in with 0 while tempdate < dateInterval[1]: #id for the hash table ID = hash(str(tempdate)[:4]+str(tempdate)[5:7]+str(tempdate)[8:10]) #fill inn the day with 0 HT.put(ID,0.0) #increment current date tempdate = tempdate + datetime.timedelta(days = 1) #for loop that goes through all the dates in the result set and sets the data for those dates for i in range(0,len(sortedData[0][j])): value = float(sortedData[0][j][i][1]) date = sortedData[0][j][i][3] ID = hash(date[:4]+date[5:7]+date[8:10]) HT.put(ID,value) tempdate = dateInterval[0] #values to find out the min, max, and avr tempMax = -1000000000000 tempMin = 1000000000000 sum = 0 count = 0 tempAVR = 0 #a loop that gets all the values for all the days in the span and puts it into an array while tempdate < dateInterval[1]: count += 1 ID = hash(str(tempdate)[:4]+str(tempdate)[5:7]+str(tempdate)[8:10]) data32 = HT.get(ID) if data32 > tempMax: tempMax = data32 if data32 < tempMin: tempMin = data32 sum += data32 xdata.append(data32) tempdate = tempdate + datetime.timedelta(days = 1) tempAVR = float(sum/float(count)) AVR = "%.2f" % tempAVR #load the data into a new arrays data[j] = xdata names[j] = sortedData[0][j][0][0] stats[j] = [tempMax,tempMin,AVR] #return the formatted data return [data, names, stats]
def test_init(self): ht = HashTable(4) assert len(ht.buckets) == 4 assert ht.length() == 0
def insertaElementoTest(self): h = HashTable() h.put(self.clave,self.valor) self.assertEquals(self.valor,h.get(self.clave))
def test_delete(self): ht = HashTable() ht.set('I', 1) ht.set('V', 5) ht.set('X', 10) assert ht.length() == 3 ht.delete('I') ht.delete('X') assert ht.length() == 1 with self.assertRaises(KeyError): ht.delete('X') # Key no longer exists with self.assertRaises(KeyError): ht.delete('A') # Key does not exist
def test_remove(): ht = HashTable() # test with integer keys ht.put(9, 100) ht.put(20, 110) # test with string keys ht.put('cat', 125) ht.put('meow', 'blah!') ht.remove(9) ht.remove('cat') assert(ht.get(9) is None) assert(ht.get('cat') is None) assert(ht.get(20) == 110) assert(ht.get('meow') == 'blah!')
def test_bucket_collision(self): h = HashTable(13) h.put('a', 1) # both will use bucket 6 h.put('n', 2) self.assertEqual(h.get('a'), 1) self.assertEqual(h.get('n'), 2)
def smaller_hashtable(): ht = HashTable(16) ht.set('apple', 1) ht.set('tomato', 2) return ht
class SetHash(object): def __init__(self, elements=None): self.hashet = HashTable() #hash + set = hashet self.size = 0 #num of key-val entries if elements is not None: for element in elements: self.add(element) def __iter__(self): for item in self.all_items(): yield item def all_items(self): return self.hashet.keys() def contains(self, key): return self.hashet.contains(key) def length(self): return self.size def add(self, element): self.size += 1 return self.hashet.set(element, element) def remove(self, element): self.size -= 1 return self.hashet.delete(element) def union(self, other_set): """Return a new set that is the union of this and 'other_set', i.e. ALL elements w/ no dupes""" union = other_set for item in self: union.add(item) return union def intersection(self, other_set): """Return a new set that is the intersection of this and 'other_set', i.e. OVERLAP elements""" intersection = SetHash() if other_set.size < self.size: smaller_set = other_set larger_set = self else: smaller_set = self larger_set = other_set for item in smaller_set: if larger_set.contains(item): intersection.add(item) return intersection #NOTE: Whatever SELF has, that OTHER_SET does NOT def difference(self, other_set): """Return a new set that is the difference of this set and 'other_set'""" difference = self intersection = self.intersection(other_set) for item in intersection: difference.remove(item) return difference def is_subset(self, other_set): """Return a boolean indicating whether 'other_set' is a subset of this set""" if other_set.size > self.size: return False for item in other_set: if not self.contains(item): return False return True
def test_keys(self): ht = HashTable() assert ht.keys() == [] ht.set('I', 1) assert ht.keys() == ['I'] ht.set('V', 5) self.assertCountEqual(ht.keys(), ['I', 'V']) # Ignore item order ht.set('X', 10) self.assertCountEqual(ht.keys(), ['I', 'V', 'X']) # Ignore item order
def test_hash_table_pution_overwrites_correctly(self): ht = HashTable(0x10000) ht.put("key-0", "val-0") ht.put("key-1", "val-1") ht.put("key-2", "val-2") ht.put("key-0", "new-val-0") ht.put("key-1", "new-val-1") ht.put("key-2", "new-val-2") return_value = ht.get("key-0") self.assertTrue(return_value == "new-val-0") return_value = ht.get("key-1") self.assertTrue(return_value == "new-val-1") return_value = ht.get("key-2") self.assertTrue(return_value == "new-val-2")
def test_set_get(): """set(key, val) should store the given val using the given key, get(key) should return the value stored with the given key""" # There are 235886 words in my dictionary. # 1.6 * the expected size is 377418, ideal for performance ideal_size = 377418 table = HashTable(ideal_size) word = "init" table.set("hound", "puppy") assert table.get("hound") == "puppy" with pytest.raises(TypeError) as err: table.set(12345, "oogabooga") assert err.value == "Key must be a string" with pytest.raises(TypeError): table.set("oogabooga") # key & value both required with pytest.raises(TypeError): table.get() # key required with pytest.raises(TypeError) as err: table.set(12345) assert err.value == "Key must be a string" with io.open('/usr/share/dict/words') as words: while word != "": word = words.readline().strip() table.set(word, word) # fill the table, key == value # Now, we should get the same stuff back. with io.open('/usr/share/dict/words') as words: while word != "": word = words.readline().strip() assert table.get(word) == word with pytest.raises(KeyError): table.get("alkejralekjreqr") # Should also be able to reset stuff assert table.get("bacon") == "bacon" table.set("bacon", "delicious") assert table.get("bacon") == "delicious"
def test_values(self): ht = HashTable() assert ht.values() == [] ht.set('I', 1) assert ht.values() == [1] ht.set('V', 5) self.assertCountEqual(ht.values(), [1, 5]) # Ignore item order ht.set('X', 10) self.assertCountEqual(ht.values(), [1, 5, 10]) # Ignore item order
def test_distinct_buckets(self): h = HashTable(13) h.put('a', 1) h.put('b', 2) h.put('c', 3) self.assertEqual(h.get('a'), 1)
def test_hash_table_removes_correctly(self): ht = HashTable(8) ht.put("key-0", "val-0") ht.put("key-1", "val-1") ht.put("key-2", "val-2") ht.put("key-3", "val-3") ht.put("key-4", "val-4") ht.put("key-5", "val-5") ht.put("key-6", "val-6") ht.put("key-7", "val-7") ht.put("key-8", "val-8") ht.put("key-9", "val-9") return_value = ht.get("key-0") self.assertTrue(return_value == "val-0") return_value = ht.get("key-1") self.assertTrue(return_value == "val-1") return_value = ht.get("key-2") self.assertTrue(return_value == "val-2") return_value = ht.get("key-3") self.assertTrue(return_value == "val-3") return_value = ht.get("key-4") self.assertTrue(return_value == "val-4") return_value = ht.get("key-5") self.assertTrue(return_value == "val-5") return_value = ht.get("key-6") self.assertTrue(return_value == "val-6") return_value = ht.get("key-7") self.assertTrue(return_value == "val-7") return_value = ht.get("key-8") self.assertTrue(return_value == "val-8") return_value = ht.get("key-9") self.assertTrue(return_value == "val-9") ht.delete("key-7") ht.delete("key-6") ht.delete("key-5") ht.delete("key-4") ht.delete("key-3") ht.delete("key-2") ht.delete("key-1") ht.delete("key-0") return_value = ht.get("key-0") self.assertTrue(return_value is None) return_value = ht.get("key-1") self.assertTrue(return_value is None) return_value = ht.get("key-2") self.assertTrue(return_value is None) return_value = ht.get("key-3") self.assertTrue(return_value is None) return_value = ht.get("key-4") self.assertTrue(return_value is None) return_value = ht.get("key-5") self.assertTrue(return_value is None) return_value = ht.get("key-6") self.assertTrue(return_value is None) return_value = ht.get("key-7") self.assertTrue(return_value is None) return_value = ht.get("key-8") self.assertTrue(return_value is "val-8") return_value = ht.get("key-9") self.assertTrue(return_value is "val-9") ht.delete("key-9") ht.delete("key-8") return_value = ht.get("key-8") self.assertTrue(return_value is None) return_value = ht.get("key-9") self.assertTrue(return_value is None)