def test_hash_map(self): test_map = HashMap() for i in range(0,1000): if i%2 is 0: test_map.put((i,), i) else: # i%2 is 1 test_map.put(str(i), [i]) # initialize the map with 1001 kv pairs self.assertEquals(test_map.size(), 1000) for i in range(0,1000): if i%2 is 0: self.assertEquals(test_map.get((i,)), i) else: # i%2 is 1 self.assertEquals(test_map.get(str(i)), [i]) with self.assertRaises(KeyNotFound): test_map.get("8") with self.assertRaises(KeyNotFound): test_map.get((991,)) with self.assertRaises(KeyNotFound): test_map.get("test_key") with self.assertRaises(KeyNotFound): test_map.get((1002,)) # remove 700 elements for i in range(100,800): if i%2 is 0: test_map.remove((i,)) else: # i%2 is 1 test_map.remove(str(i)) self.assertEquals(test_map.size(), 300) self.assertTrue(test_map.contains((80,))) self.assertFalse(test_map.contains((120,))) # insert 9000 more values for i in range(1000,10000): if i%2 is 1: test_map.put((i,), i) else: # i%2 is 0 test_map.put(str(i), [i]) self.assertEquals(test_map.size(), 9300) for i in range(1000,10000): if i%2 is 1: self.assertEquals(test_map.get((i,)), i) else: # i%2 is 0 self.assertEquals(test_map.get(str(i)), [i])
def test_hash_map(self): test_map = HashMap() for i in range(0, 1000): if i % 2 is 0: test_map.put((i, ), i) else: # i%2 is 1 test_map.put(str(i), [i]) # initialize the map with 1001 kv pairs self.assertEquals(test_map.size(), 1000) for i in range(0, 1000): if i % 2 is 0: self.assertEquals(test_map.get((i, )), i) else: # i%2 is 1 self.assertEquals(test_map.get(str(i)), [i]) with self.assertRaises(KeyNotFound): test_map.get("8") with self.assertRaises(KeyNotFound): test_map.get((991, )) with self.assertRaises(KeyNotFound): test_map.get("test_key") with self.assertRaises(KeyNotFound): test_map.get((1002, )) # remove 700 elements for i in range(100, 800): if i % 2 is 0: test_map.remove((i, )) else: # i%2 is 1 test_map.remove(str(i)) self.assertEquals(test_map.size(), 300) self.assertTrue(test_map.contains((80, ))) self.assertFalse(test_map.contains((120, ))) # insert 9000 more values for i in range(1000, 10000): if i % 2 is 1: test_map.put((i, ), i) else: # i%2 is 0 test_map.put(str(i), [i]) self.assertEquals(test_map.size(), 9300) for i in range(1000, 10000): if i % 2 is 1: self.assertEquals(test_map.get((i, )), i) else: # i%2 is 0 self.assertEquals(test_map.get(str(i)), [i])
def test_remove_1(self): """ Test remove() with Example #1 from the guidelines. :passed: yes """ print("--- EXAMPLE 1 ---") m = HashMap(50, hash_function_1) print(m.get('key1')) m.put('key1', 10) print(m.get('key1')) m.remove('key1') print(m.get('key1')) m.remove('key4')
def test_remove_3(self): """ Test remove() on an empty HashMap. :passed: yes """ hash_m = HashMap(6, hash_function_1) print(hash_m.remove("cat"))
def test_remove_2(self): """ Test remove() on a HashMap of capacity 0. :passed: yes """ hash_m = HashMap(0, hash_function_1) print(hash_m.remove("key1"))
def test_contains_key_5(self): """ Test contains_key with Example #1 from the guidelines. :passed: yes """ print("--- EXAMPLE 1 ---") m = HashMap(50, hash_function_1) print(m.contains_key('key1')) m.put('key1', 10) m.put('key2', 20) m.put('key3', 30) print(m.contains_key('key1')) print(m.contains_key('key4')) print(m.contains_key('key2')) print(m.contains_key('key3')) m.remove('key3') print(m.contains_key('key3'))
def test_remove(self): """Tests the HashMap remove method with both hash functions""" test_values = [("test_5", 5), ("test_-5", -5), ("test_5_", 12), ("diff_word", 15), ("another_word", 20), ("set", 10), ("tes", 8), ("anotha_one", -7), ("completely_different", 13), ("getting_there", -1)] student_map = HashMap(10, hash_function_1) student_map_hf2 = HashMap(10, hash_function_2) # get all keys from the map sm_values = get_keys_from_map(student_map) smhf2_values = get_keys_from_map(student_map_hf2) self.assertEqual(len(sm_values), len(smhf2_values)) self.assertEqual(0, len(sm_values)) # add test values to the map for key, val in test_values: student_map.put(key, val) student_map_hf2.put(key, val) # get all keys from the map sm_values = get_keys_from_map(student_map) smhf2_values = get_keys_from_map(student_map_hf2) # check that all keys that are in test_values are in the map self.assertTrue(check_lists_are_equals(test_values, sm_values)) self.assertTrue(check_lists_are_equals(test_values, smhf2_values)) # test1 - remove size 1 linked list bucket node (bucket 6) value_to_remove = ("test_5", 5) # remove the tuple from the control list test_values.remove(value_to_remove) # Remove the value from the maps student_map.remove(value_to_remove[0]) student_map_hf2.remove(value_to_remove[0]) # update keys found in the map sm_values = get_keys_from_map(student_map) smhf2_values = get_keys_from_map(student_map_hf2) # check that all keys that are in test_values are in the map self.assertTrue(check_lists_are_equals(test_values, sm_values)) self.assertTrue(check_lists_are_equals(test_values, smhf2_values)) # test2 - remove size >1 linked list bucket node HEAD POSITION (bucket 2) value_to_remove = ("completely_different", 13) # remove the tuple from the control list test_values.remove(value_to_remove) # Remove the value from the maps student_map.remove(value_to_remove[0]) student_map_hf2.remove(value_to_remove[0]) # update keys found in the map sm_values = get_keys_from_map(student_map) smhf2_values = get_keys_from_map(student_map_hf2) # check that all keys that are in test_values are in the map self.assertTrue(check_lists_are_equals(test_values, sm_values)) self.assertTrue(check_lists_are_equals(test_values, smhf2_values)) # test3 - remove size >1 linked list bucket node TAIL POSITION (bucket 2) value_to_remove = ("another_word", 20) # remove the tuple from the control list test_values.remove(value_to_remove) # Remove the value from the maps student_map.remove(value_to_remove[0]) student_map_hf2.remove(value_to_remove[0]) # update keys found in the map sm_values = get_keys_from_map(student_map) smhf2_values = get_keys_from_map(student_map_hf2) # check that all keys that are in test_values are in the map self.assertTrue(check_lists_are_equals(test_values, sm_values)) self.assertTrue(check_lists_are_equals(test_values, smhf2_values)) # test4 - remove size >1 linked list bucket node MID POSITION (bucket 2) value_to_remove = ("tes", 8) # remove the tuple from the control list test_values.remove(value_to_remove) # Remove the value from the maps student_map.remove(value_to_remove[0]) student_map_hf2.remove(value_to_remove[0]) # update keys found in the map sm_values = get_keys_from_map(student_map) smhf2_values = get_keys_from_map(student_map_hf2) # check that all keys that are in test_values are in the map self.assertTrue(check_lists_are_equals(test_values, sm_values)) self.assertTrue(check_lists_are_equals(test_values, smhf2_values)) # test5 - remove size >1 linked list bucket node value_to_remove = ("anotha_one", -7) # remove the tuple from the control list test_values.remove(value_to_remove) # Remove the value from the maps student_map.remove(value_to_remove[0]) student_map_hf2.remove(value_to_remove[0]) # update keys found in the map sm_values = get_keys_from_map(student_map) smhf2_values = get_keys_from_map(student_map_hf2) # check that all keys that are in test_values are in the map self.assertTrue(check_lists_are_equals(test_values, sm_values)) self.assertTrue(check_lists_are_equals(test_values, smhf2_values)) # test6 - remove size >1 linked list bucket node value_to_remove = ("set", 10) # remove the tuple from the control list test_values.remove(value_to_remove) # Remove the value from the maps student_map.remove(value_to_remove[0]) student_map_hf2.remove(value_to_remove[0]) # update keys found in the map sm_values = get_keys_from_map(student_map) smhf2_values = get_keys_from_map(student_map_hf2) # check that all keys that are in test_values are in the map self.assertTrue(check_lists_are_equals(test_values, sm_values)) self.assertTrue(check_lists_are_equals(test_values, smhf2_values)) # test7 - remove value not in map (should do nothing) before_size_sm = student_map.size before_size_smhf2 = student_map_hf2.size student_map.remove("key_not_in_list") student_map_hf2.remove("key_not_in_list") self.assertEqual(before_size_sm, student_map.size) self.assertEqual(before_size_smhf2, student_map_hf2.size)
def top_words(source, number): """ Takes a plain text file and counts the number of occurrences of case insensitive words. Returns the top `number` of words in a list of tuples of the form (word, count). Args: source: the file name containing the text number: the number of top results to return (e.g. 5 would return the 5 most common words) Returns: A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)]) """ keys = set() keys_list = [] max_count = 1 ht = HashMap(2500, hash_function_2) # This block of code will read a file one word as a time and # put the word in `w`. It should be left as starter code. with open(source) as f: for line in f: words = rgx.findall(line) for w in words: w = w.lower() if w in keys: # determine the current count count = keys_list.count(w) # remove the word from the hash table ht.remove(w, count) # put the link in the new bucket (buckets are assigned according to count, not word) ht.put(w, count + 1, True) if count + 1 > max_count: max_count = count + 1 else: ht.put(w, 1, True) keys.add( w ) # sets don't allow duplicates, so only add if it's a new word # add every key, whether or not it already exists, so we can easily get the count each time keys_list.append(w) # get the top used words by grabbing them from the buckets from the highest index down results = [] number_reached = False for i in range(max_count, 1, -1): bucket = ht.get_bucket_by_key(i) if bucket is not None and not bucket.is_empty(): for link in bucket: if link.value == i: results.append((link.key, link.value)) if len(results) == number: number_reached = True break if number_reached: break return results # print(top_words("alice.txt",10)) # COMMENT THIS OUT WHEN SUBMITTING TO GRADESCOPE
#! /usr/bin/python ''' real 0m26.026s user 0m10.089s sys 0m2.800s 3.411 times slower than standart library dict on this test ''' from random import choice from hash_map import HashMap c_range = range(40, 121) rc = lambda : chr(choice(c_range)) for i in range(5000): d = HashMap() # table_size = 100 for j in range(70): s = rc() + rc() + rc() d.add((s, s.upper())) for k in d: v = d.get(k) print v d.remove(k) d.add((k, v))
m.put('str' + str(i // 3), i * 100) if i % 10 == 9: print(m.empty_buckets(), m.table_load(), m.size, m.capacity) """ CONTAINS KEY """ print("\n\n******** CONTAINS_KEY() ********") print("--- EXAMPLE 1 ---") m = HashMap(50, hash_function_1) print(m.contains_key('key1')) m.put('key1', 10) m.put('key2', 20) m.put('key3', 30) print(m.contains_key('key1')) print(m.contains_key('key4')) print(m.contains_key('key2')) print(m.contains_key('key3')) m.remove('key3') print(m.contains_key('key3')) print("--- EXAMPLE 2 ---") m = HashMap(75, hash_function_2) keys = [i for i in range(1, 1000, 20)] for key in keys: m.put(str(key), key * 42) print(m.size, m.capacity) result = True for key in keys: # all inserted keys must be present result = result and m.contains_key(str(key)) # all NOT inserted keys must be absent result = result and not m.contains_key(str(key + 1)) print(result)