def test_contains_key_2(self): """ Test contains_key with a hash map of 1 bucket. :passed: yes """ # Make a linked list ll_1 = LinkedList() ll_1.add_front("cot", 3) ll_1.add_front("box", 2) ll_1.add_front("axe", 1) # print("ll_1:", ll_1) # Make a hash map hash_m = HashMap(7, hash_function_2) hash_m._buckets[6] = ll_1 # Make calls to contains_key self.assertTrue(hash_m.contains_key("axe")) self.assertTrue(hash_m.contains_key("box")) self.assertTrue(hash_m.contains_key("cot")) self.assertFalse(hash_m.contains_key("Axe")) self.assertFalse(hash_m.contains_key("aXe")) self.assertFalse(hash_m.contains_key("axE")) self.assertFalse(hash_m.contains_key("AXE")) self.assertFalse(hash_m.contains_key("boxx")) self.assertFalse(hash_m.contains_key("cat")) self.assertFalse(hash_m.contains_key("verb"))
def test_all(): hash_map = HashMap() el_count = EL_COUNT for x in range(el_count): hash_map.put(str(x), str(x)) for x in range(el_count): assert hash_map.get(str(x)) == str(x)
class TestHashMap(unittest.TestCase): def setUp(self): self.hash_map = HashMap() self.item = Item('key', 'value') def test_resize(self): self.assertEqual(len(self.hash_map.buckets), 1) self.hash_map._resize() self.assertEqual(len(self.hash_map.buckets), 2) self.hash_map._resize() self.assertEqual(len(self.hash_map.buckets), 4) def test_insert(self): self.hash_map.insert(self.item) self.assertEqual(len(self.hash_map), 1) self.assertEqual(len(self.hash_map.buckets), 1) self.assertIn(self.item, self.hash_map) new_item_1 = Item('new key', 'new value') self.hash_map.insert(new_item_1) self.assertEqual(len(self.hash_map), 2) self.assertEqual(len(self.hash_map.buckets), 2) self.assertIn(self.item, self.hash_map) self.assertIn(new_item_1, self.hash_map) new_item_2 = Item('newer key', 'newer value') self.hash_map.insert(new_item_2) self.assertEqual(len(self.hash_map), 3) self.assertEqual(len(self.hash_map.buckets), 4) self.assertIn(self.item, self.hash_map) self.assertIn(new_item_1, self.hash_map) self.assertIn(new_item_2, self.hash_map)
def test_remove_2(self): """ Test remove() on a HashMap of capacity 0. :passed: yes """ hash_m = HashMap(0, hash_function_1) print(hash_m.remove("key1"))
def test_remove_3(self): """ Test remove() on an empty HashMap. :passed: yes """ hash_m = HashMap(6, hash_function_1) print(hash_m.remove("cat"))
def test_generate_hash_index_1(self): """ Test generate_hash_index. :passed: yes """ hash_m = HashMap(6, hash_function_1) print(hash_m.generate_hash_index("dog")) self.assertEqual(2, hash_m.generate_hash_index("dog"))
def test_compressor(): hash_map = HashMap(3) hash_code = hash_map.hash("test") compressed = hash_map.compressor(hash_code) result = (hash_code % 3) assert compressed == result
def final_test(self): map = HashMap() map[1] = 100 map[5] = 500 map[20] = 5 map["Hello"] = "Goodbye" assert (map.exists("Hello")) assert (map["Hello"]) == "Goodbye" map.delete("Hello") assert (map["Hello"]) == None
def test_get(self): """Tests the HashMap get method""" test_values = [("test_5", 5), ("test_-5", -5), ("test_5_", 5), ("diff_word", 15), ("another_word", 20), ("set", 10), ("anotha_one", -7), ("completely_different", 5), ("getting_there", -1)] collision_values = [("completely_different", 5), ("anotha_one", -7), ("set", 10), ("another_word", 20)] head_node = collision_values[0] tail_node = collision_values[3] student_map = HashMap(10, hash_function_1) # add all key value pairs to the table for key, val in test_values: student_map.put(key, val) # test get at linked_list head self.assertEqual(student_map.get(head_node[0]), head_node[1]) # test get at linked_list tail self.assertEqual(student_map.get(tail_node[0]), tail_node[1]) # test get at > 2 collision bucket for node in collision_values: self.assertEqual(student_map.get(node[0]), node[1]) # test get with no collision self.assertEqual(student_map.get("getting_there"), -1) # test that all values are in the list for node in test_values: self.assertEqual(student_map.get(node[0]), node[1])
def top_words(source, number): """ Takes a plain text file and counts the number of occurrences of case insensitive words. Returns the top `number` of words in a list of tuples of the form (word, count). Args: source: the file name containing the text number: the number of top results to return (e.g. 5 would return the 5 most common words) Returns: A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)]) """ keys = set() # Variable representing empty set. ht = HashMap( 2500, hash_function_2 ) # Variable to represent hash map construct utilizing above function. # This block of code will read a file one word as a time and # put the word in `w`. It should be left as starter code. with open(source ) as f: # Opens file to be used declaring it as variable 'f'. for line in f: # Loops through each line within file (f). words = rgx.findall( line) # Variable utilized to represent words within each line. for w in words: # Loops through each word within each line. lw = w.lower( ) # Turns words lowercase to remove case sensitivity. keys.add( lw ) # Adds lowercase word to set represented by variable 'key'. if ht.contains_key( lw): # Checks if word is already present in hash map. new_value = ( ht.get(lw) + 1 ) # Variable represents word count increased by one. ht.put( lw, new_value ) # Inserts word into hash map to have word count be updated. else: ht.put( lw, 1 ) # Inserts word into hash map with initial count of one. keys_list = [] # Variable represents an empty list. for values in keys: # Loops through words present in set represented by variable 'keys'. ind = ht._hash_function(values) % ht.capacity # Variable to represent number established by chosen function and available capacity. temp = ht._buckets[ ind] # Variable to represent position within hash map containing linked list. node = temp.contains( values ) # Variable to represent node containing key if already present. keys_list.append( (node.key, node.value)) # Adds tuple to list containing word, word count. keys_list.sort( key=lambda tup: tup[1], reverse=True) # Sorts list in descending order based on word count. return keys_list[ 0: number] # Returns list of top words within given range provided by user.
def top_words(source, number): """ Takes a plain text file and counts the number of occurrences of case insensitive words. Returns the top `number` of words in a list of tuples of the form (word, count). Args: source: the file name containing the text number: the number of top results to return (e.g. 5 would return the 5 most common words) Returns: A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)]) """ keys = set() ht = HashMap(2500,hash_function_2) # This block of code will read a file one word as a time and # put the word in `w`. It should be left as starter code. with open(source) as f: for line in f: words = rgx.findall(line) for w in words: if ht.contains_key(w.lower()): ht.put(w.lower(), ht.get(w.lower()) + 1) else: ht.put(w.lower(), 1) tup = ht.sorted_tup() return tup[:number]
def test_empty_buckets_2(self): """ Test empty_buckets() with Example #2 from the guidelines. :passed: yes """ print("--- EXAMPLE 2 ---") m = HashMap(50, hash_function_1) for i in range(150): m.put('key' + str(i), i * 100) if i % 30 == 0: print(m.empty_buckets(), m.size, m.capacity)
def test_resize_table_1(self): """ Test resize_table() with Example #1 from the guidelines. :passed: yes """ print("--- EXAMPLE 1 ---") m = HashMap(20, hash_function_1) m.put('key1', 10) print(m.size, m.capacity, m.get('key1'), m.contains_key('key1')) m.resize_table(30) print(m.size, m.capacity, m.get('key1'), m.contains_key('key1'))
def test_contains_key_3(self): """ Test contains_key with an empty hash map. :passed: yes """ # Make an empty hash map hash_m = HashMap(3, hash_function_2) print(hash_m) self.assertFalse(hash_m.contains_key("cat")) self.assertFalse(hash_m.contains_key(" "))
def test_keys(self): hash = HashMap() hash[1] = [100] hash[2] = [200] hash[3] = [300] hash["Hello"] = "Goodbye" keys = hash.keys() assert ("Hello" in keys) assert (1 in keys) assert (2 in keys) assert (3 in keys)
def test_values(self): hash = HashMap() hash[1] = [100] hash[2] = [200] hash[3] = [300] hash["Hello"] = "Goodbye" values = hash.values() assert ([100] in values) assert ([200] in values) assert ([300] in values) assert ("Goodbye" in values)
def test_table_load_2(self): """ Test table_load() with Example #2 from the guidelines. :passed: yes """ print("--- EXAMPLE 2 ---") m = HashMap(50, hash_function_1) for i in range(50): m.put('key' + str(i), i * 100) if i % 10 == 0: print(m.table_load(), m.size, m.capacity)
def test_get(self): h = HashMap(100) h.set('1', SampleObject('A')) b_obj = SampleObject('B') h.set('2', b_obj) h.set('3', SampleObject('C')) self.assertEqual(h.get('2'), b_obj) self.assertEqual(h.get('4'), None)
def test_put_1(self): """ Test put() on a hash map of capacity 0. :passed: yes """ hash_m = HashMap(0, hash_function_1) print("map before put():", hash_m) hash_m.put("key1", 10) print("put('key1', 10):", hash_m) print(hash_m.put("key1", 10))
class Account(object): # Initialize Account object with cash and hash map of stocks, where # stock name points to number of shares def __init__(self): self.cash = 0 self.stocks = HashMap() # Compare account's cash and stocks with that of another account # Used in TransactionParser's reconcile() method def compare(self, other_acct): diffs = [] other_stocks = other_acct.stocks all_keys = list(set(self.stocks.keys() + other_stocks.keys())) for key in all_keys: diff = self.stock_diff(key, other_stocks) if diff and diff != 0: diffs.append(key + " " + str(int(diff))) diffs.insert(0, "Cash " + str(self.cash_diff(other_acct.cash))) return "\n".join(diffs) def cash_diff(self, other_cash): return int(other_cash) - self.cash # Calculates differences in shares of stocks between two accounts def stock_diff(self, key, declared_results): if self.stocks[key] and declared_results[key]: return float(declared_results[key]) - self.stocks[key] elif self.stocks[key]: return -1 * self.stocks[key] elif declared_results[key] and key != "Cash": return float(declared_results[key]) else: return None # Wrapper for setting a stock into self.stocks def set_stock(self, name, value): self.stocks[name] = value # Wrapper for getting a stock def get_stock(self, stock): return self.stocks.get(stock, 0)
def top_words(source, number): """ Takes a plain text file and counts the number of occurrences of case insensitive words. Returns the top `number` of words in a list of tuples of the form (word, count). Args: source: the file name containing the text number: the number of top results to return (e.g. 5 would return the 5 most common words) Returns: A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)]) """ keys = set() ht = HashMap(2500, hash_function_2) # Reads a file one word as a time and with open(source) as f: for line in f: words = rgx.findall(line) for w in words: w = w.lower( ) # covert word to lowercase for case-insensitive comparisons if ht.contains_key( w ): # if word already exists as key in ht, add 1 to value to track count value = ht.get(w) ht.put(w, value + 1) else: ht.put( w, 1 ) # if word does not exist in ht as key, add word as key and initialize value as 1 keys.add(w) # add word to set of keys count_dict = {} # initialize empty dictionary count_array = [] # initialize empty array for key in keys: # for each key, get it's value from ht and then add key/value pair to count_dict value = ht.get(key) count_dict[key] = value for key in keys: # for each key, add value/key pair to array for sorting count_array.append((count_dict[key], key)) count_array = sorted( count_array, reverse=True ) # reverse sort count_array from largest to smallest value for i in range( len(count_array) ): # reswap key/value pairs to get (word, count) for each tuple in count_array count_array[i] = (count_array[i][1], count_array[i][0]) return count_array[: number] # return only the requested number of top words
def test_contains_key_4(self): """ Test contains_key with an empty hash map. :passed: yes """ # Make an empty hash map hash_m = HashMap(0, hash_function_2) print("hash_m:", hash_m) self.assertFalse(hash_m.contains_key("blue")) self.assertFalse(hash_m.contains_key("a")) self.assertFalse(hash_m.contains_key(" ")) print(hash_m.contains_key("a"))
def test_assign(): hash_map = HashMap(1) hash_map.assign("key_1", "value_1") hash_map.assign("key_2", "value_2") index_1 = hash_map.compressor(hash_map.hash("key_1")) assert hash_map.array[index_1].get_head_node().get_next_node().get_value( ) == ("key_1", "value_1") hash_map.assign("key_1", "test") assert hash_map.array[index_1].get_head_node().get_next_node().get_value( ) == ("key_1", "test") index_2 = hash_map.compressor(hash_map.hash("key_2")) assert hash_map.array[index_2].get_head_node().get_value() == ("key_2", "value_2")
def test_clear_2(self): """ Test clear() with Example #2 from the guidelines. :passed: yes """ print("--- EXAMPLE 2 ---") m = HashMap(50, hash_function_1) print(m.size, m.capacity) m.put('key1', 10) print(m.size, m.capacity) m.put('key2', 20) print(m.size, m.capacity) m.resize_table(100) print(m.size, m.capacity) m.clear() print(m.size, m.capacity)
def test_init(self): """Checks that the hash_table initializes correctly, if __init__ is provided, ignore""" student_map = HashMap(10, hash_function_1) self.assertEqual(10, student_map.capacity) self.assertEqual(0, student_map.size) self.assertEqual(hash_function_1, student_map._hash_function) for bucket in student_map._buckets: self.assertIsNone(bucket.head)
def test_contains_key(self): """Tests the HashMap contains_key method""" test_values = [("test_5", 5), ("test_-5", -5), ("test_5_", 5), ("diff_word", 15), ("another_word", 20), ("set", 10), ("anotha_one", -7), ("completely_different", 5), ("getting_there", -1)] student_map = HashMap(10, hash_function_1) # simple check to test that all values are in the list for key, val in test_values: student_map.put(key, val) found = False for bucket in student_map._buckets: if bucket.contains(key): found = True self.assertEqual(found, student_map.contains_key(key))
def test_put_and_get(self): hash = HashMap() hash[1] = [100] hash["Hello"] = "Goodbye" hash[1.7] = 45 assert (hash[1]) == [100] assert (hash["Hello"]) == "Goodbye" assert (hash[1.7]) == 45
def add_words_to_hash_map(self, current_hash_map: HashMap, current_line: str): if self.line_count >= 50: self.line_count = 0 self.hash_maps.append(current_hash_map) current_hash_map = HashMap() split_line = current_line.split(' ') if '\n' in split_line: split_line.remove('\n') if '' in split_line: split_line.remove('') for word in self.strip_punctuation(split_line): if word in self.punctuation: continue map_index = current_hash_map.hash_string(word) current_hash_map.add_to_hash_table(map_index, (word, 1)) return current_hash_map
def top_words(source, number): """ Takes a plain text file and counts the number of occurrences of case insensitive words. Returns the top `number` of words in a list of tuples of the form (word, count). Args: source: the file name containing the text number: the number of top results to return (e.g. 5 would return the 5 most common words) Returns: A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)]) """ keys = set() ht = HashMap(2500, hash_function_2) # This block of code will read a file one word at a time and # put the word in `w`. It should be left as starter code. with open(source) as f: for line in f: words = rgx.findall(line) for w in words: # Convert all words to lowercase prior to insertion w = w.lower() # If the word is already in the hash map, pass the value with a new updated count if ht.contains_key(w): count = ht.get(w) + 1 ht.put(w, count) else: # Otherwise, create a new entry in the hashmap ht.put(w, 1) # Add all of the words to the keys set for bucket in ht.get_buckets(): # Iterate through each bucket/linked list curr = bucket.head while curr is not None: # Add the keys as a tuple keys.add((curr.key, curr.value)) curr = curr.next # Cast the set as a list all_words = list(keys) # Sort the words according to their value in the tuple all_words.sort(key=lambda word: word[1]) slice_val = (number * -1 - 1) top_wds = all_words[:slice_val:-1] return top_wds
def top_words(source, number): """ Takes a plain text file and counts the number of occurrences of case insensitive words. Returns the top `number` of words in a list of tuples of the form (word, count). Args: source: the file name containing the text number: the number of top results to return (e.g. 5 would return the 5 most common words) Returns: A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)]) """ keys = set() ht = HashMap(2500, hash_function_2) # This block of code will read a file one word as a time and # put the word in `w`. It should be left as starter code. with open(source) as f: for line in f: words = rgx.findall(line) for w in words: # Iterate through all the words in the line. # Place lowercased version of words. if ht.contains_key( w.lower()): # If the word is in the hashmap. ht.put(w.lower(), ht.get(w.lower()) + 1) # Update the word count by 1. else: # If the word does not exist in the hashmap. ht.put(w.lower(), 1) # Place the key in the map with the value of 1. keys.add(w.lower()) # Add the new keys into the keys set. list_of_occurences = [ ] # Create an empty list to hold the tuples of keys and values. for key in keys: # Iterate through all the keys. list_of_occurences.append( (key, ht.get(key))) # Add the key and value tuple into the list. # Source to help me find a way to implement this: # stackoverflow.com/questions/10695139/sort-a-list-of-tuples-by-2nd-item-integer-value # We use lambda here to sort the list of tuples by its second value. # The sorting is also reversed to make it in descending order. sorted_list = sorted(list_of_occurences, key=lambda x: x[1], reverse=True) return sorted_list[: number] # Using list slice, return the top numbers of the list depending on what the user inputs # print(top_words("alice.txt",10)) # COMMENT THIS OUT WHEN SUBMITTING TO GRADESCOPE
def setUp(self): self.uut = HashMap()
class HashMapTests(unittest.TestCase, DictTestCases): def setUp(self): self.uut = HashMap() def mock_hashes_to(self, index=0): class Mock(object): def __hash__(self): return index def __str__(self): return "mock(%s)" % index __repr__ = __str__ return Mock() def test_initial_current_capacity_is_16(self): self.assertEqual(16, self.uut.capacity()) def test_initial_doubling_size_is_12(self): self.assertEqual(12, self.uut.doubling_size()) def test_when_inialized_with_one_half_then_doubling_size_is_8(self): uut = HashMap(0.5) self.assertEqual(8, uut.doubling_size()) def test_initial_len_is_0(self): self.assertEqual(0, len(self.uut)) def test_insertion_increses_size_to_1(self): self.uut.insert(self.mock_hashes_to(), 42) def test_collisions_are_handled(self): first = self.mock_hashes_to(1) second = self.mock_hashes_to(1) self.uut.insert(first, "spam") self.uut.insert(second, "eggs") self.assertEqual("spam", self.uut.get(first)) self.assertEqual("eggs", self.uut.get(second)) def test_inserting_items_with_a_higher_value_works(self): item = self.mock_hashes_to(99) self.uut.insert(item, 42) self.assertEqual(42, self.uut.get(item)) def test_when_at_doubling_size_then_the_capacity_doubles(self): for i in xrange(11): self.uut.insert(i, "_") self.assertEqual(16, self.uut.capacity()) self.uut.insert(12, "_") self.assertEqual(32, self.uut.capacity()) self.assertEqual(12, len(self.uut)) def test_len_is_0_after_delete_of_empty(self): self.uut.delete("foo") self.assertEqual(0, len(self.uut)) def test_len_is_0_after_delete_of_only_item(self): self.uut.insert("foo", "_") self.uut.delete("foo") self.assertEqual(0, len(self.uut)) def test_len_is_0_after_delete_of_only_item_twice(self): self.uut.insert("foo", "_") self.uut.delete("foo") self.uut.delete("foo") self.assertEqual(0, len(self.uut))
def test_when_inialized_with_one_half_then_doubling_size_is_8(self): uut = HashMap(0.5) self.assertEqual(8, uut.doubling_size())
def test_hash_map(self): test_map = HashMap() for i in range(0,1000): if i%2 is 0: test_map.put((i,), i) else: # i%2 is 1 test_map.put(str(i), [i]) # initialize the map with 1001 kv pairs self.assertEquals(test_map.size(), 1000) for i in range(0,1000): if i%2 is 0: self.assertEquals(test_map.get((i,)), i) else: # i%2 is 1 self.assertEquals(test_map.get(str(i)), [i]) with self.assertRaises(KeyNotFound): test_map.get("8") with self.assertRaises(KeyNotFound): test_map.get((991,)) with self.assertRaises(KeyNotFound): test_map.get("test_key") with self.assertRaises(KeyNotFound): test_map.get((1002,)) # remove 700 elements for i in range(100,800): if i%2 is 0: test_map.remove((i,)) else: # i%2 is 1 test_map.remove(str(i)) self.assertEquals(test_map.size(), 300) self.assertTrue(test_map.contains((80,))) self.assertFalse(test_map.contains((120,))) # insert 9000 more values for i in range(1000,10000): if i%2 is 1: test_map.put((i,), i) else: # i%2 is 0 test_map.put(str(i), [i]) self.assertEquals(test_map.size(), 9300) for i in range(1000,10000): if i%2 is 1: self.assertEquals(test_map.get((i,)), i) else: # i%2 is 0 self.assertEquals(test_map.get(str(i)), [i])