def test_table_load(self):
        """Tests HashMap table_load method"""
        test_values = [("test_5", 5), ("test_-5", -5), ("test_5_", 5),
                       ("diff_word", 15), ("another_word", 20)]
        init_capacity = 10
        student_map = HashMap(init_capacity, hash_function_1)
        student_map_d = HashMap(init_capacity * 2, hash_function_1)
        # 0 / 10 = 0, 0 / 20 = 0
        self.assertEqual(student_map.table_load(), student_map_d.table_load())

        for key, val in test_values:
            student_map.put(key, val)
            student_map_d.put(key, val)

        # test known load ( 5 / 10 = 0.5 ),  ( 5 / 20 = 0.25 )
        self.assertEqual((len(test_values) / init_capacity),
                         student_map.table_load())
        self.assertEqual((len(test_values) / (init_capacity * 2)),
                         student_map_d.table_load())

        student_map = HashMap(init_capacity, hash_function_1)
        student_map_d = HashMap(init_capacity * 2, hash_function_1)

        # test high table load
        random_cases = 1000
        # add random key, value pairs to the table
        for i in range(random_cases):
            key, value = create_random_tuple()
            student_map.put(key, value)
            student_map_d.put(key, value)

        self.assertAlmostEqual(student_map.table_load(),
                               student_map_d.table_load() * 2)
    def test_empty_buckets(self):
        """Checks the empty_buckets method"""
        test_values = [("test_5", 5), ("test_-5", -5), ("test_5_", 5),
                       ("diff_word", 15), ("another_word", 20), ("set", 10),
                       ("anotha_one", -7), ("completely_different", 5),
                       ("getting_there", -1)]

        empty_buckets = initial_capacity = 10
        student_map = HashMap(initial_capacity, hash_function_1)
        self.assertEqual(empty_buckets, student_map.empty_buckets())

        empty_buckets = initial_capacity = 20
        student_map = HashMap(initial_capacity, hash_function_1)
        self.assertEqual(empty_buckets, student_map.empty_buckets())

        student_map.put("first_value", 5)
        self.assertEqual(empty_buckets - 1, student_map.empty_buckets())

        student_map = HashMap(10, hash_function_1)
        student_map_hf2 = HashMap(10, hash_function_2)
        for key, val in test_values:
            student_map.put(key, val)
            student_map_hf2.put(key, val)
        # should have 5 empty buckets with hash_function_1
        self.assertEqual(5, student_map.empty_buckets())
        # 4 with hash_function_2
        self.assertEqual(4, student_map_hf2.empty_buckets())
    def test_resize_table(self):
        """Tests the resize_table method: checks that links are not changed and properties are updated"""
        test_values = [("test_5", 5), ("test_-5", -5), ("test_5_", 5),
                       ("diff_word", 15), ("another_word", 20), ("set", 10),
                       ("anotha_one", -7), ("completely_different", 5),
                       ("getting_there", -1)]
        student_map = HashMap(10, hash_function_1)
        for key, value in test_values:
            student_map.put(key, value)
        self.assertEqual(10, student_map.capacity)

        # get before resize state
        keys_before_resize = get_keys_from_map(student_map)
        size_before_resize = student_map.size

        # Test 1
        # resize the table smaller -> bigger
        student_map.resize_table(50)
        self.assertEqual(50, student_map.capacity)

        # get after resize state
        keys_after_resize = get_keys_from_map(student_map)
        size_after_resize = student_map.size

        # test that no nodes were lost in the resize
        self.assertEqual(len(keys_before_resize), len(keys_after_resize))
        self.assertEqual(size_before_resize, size_after_resize)
        for key in keys_before_resize:
            self.assertTrue(key in keys_after_resize)
        for key in keys_after_resize:
            self.assertTrue(key in keys_before_resize)

        # Test 2
        student_map = HashMap(10, hash_function_1)
        for key, value in test_values:
            student_map.put(key, value)
        self.assertEqual(10, student_map.capacity)

        # get before resize state
        keys_before_resize = get_keys_from_map(student_map)
        size_before_resize = student_map.size

        # resize the table bigger -> smaller
        student_map.resize_table(5)
        self.assertEqual(5, student_map.capacity)

        # get after resize state
        keys_after_resize = get_keys_from_map(student_map)
        size_after_resize = student_map.size

        # test that no nodes were lost in the resize
        self.assertEqual(len(keys_before_resize), len(keys_after_resize))
        self.assertEqual(size_before_resize, size_after_resize)
        for key in keys_before_resize:
            self.assertTrue(key in keys_after_resize)
        for key in keys_after_resize:
            self.assertTrue(key in keys_before_resize)
	def test_load(self):
		############# test full load ##############
		h = HashMap(100)
		for i in xrange(0, 100):
			added = h.set(str(i), SampleObject(chr(i + 10)))
		self.assertEqual(h.load(), 1)
		######## test partially full load #########
		h = HashMap(100)
		for i in xrange(0, 67):
			added = h.set(str(i), SampleObject(chr(i + 10)))
		self.assertEqual(h.load(), 0.67)
		############# test empty load #############
		h = HashMap(100)
		self.assertEqual(h.load(), 0)
    def test_contains_key_2(self):
        """
        Test contains_key with a hash map of 1 bucket.
        :passed: yes
        """
        # Make a linked list
        ll_1 = LinkedList()
        ll_1.add_front("cot", 3)
        ll_1.add_front("box", 2)
        ll_1.add_front("axe", 1)
        # print("ll_1:", ll_1)

        # Make a hash map
        hash_m = HashMap(7, hash_function_2)
        hash_m._buckets[6] = ll_1

        # Make calls to contains_key
        self.assertTrue(hash_m.contains_key("axe"))
        self.assertTrue(hash_m.contains_key("box"))
        self.assertTrue(hash_m.contains_key("cot"))

        self.assertFalse(hash_m.contains_key("Axe"))
        self.assertFalse(hash_m.contains_key("aXe"))
        self.assertFalse(hash_m.contains_key("axE"))
        self.assertFalse(hash_m.contains_key("AXE"))
        self.assertFalse(hash_m.contains_key("boxx"))
        self.assertFalse(hash_m.contains_key("cat"))
        self.assertFalse(hash_m.contains_key("verb"))
Beispiel #6
0
def test_all():
    hash_map = HashMap()
    el_count = EL_COUNT
    for x in range(el_count):
        hash_map.put(str(x), str(x))
    for x in range(el_count):
        assert hash_map.get(str(x)) == str(x)
def top_words(source, number):
    """
    Takes a plain text file and counts the number of occurrences of case insensitive words.
    Returns the top `number` of words in a list of tuples of the form (word, count).

    Args:
        source: the file name containing the text
        number: the number of top results to return (e.g. 5 would return the 5 most common words)
    Returns:
        A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)])
    """

    keys = set()  # Variable representing empty set.
    ht = HashMap(
        2500, hash_function_2
    )  # Variable to represent hash map construct utilizing above function.

    # This block of code will read a file one word as a time and
    # put the word in `w`. It should be left as starter code.
    with open(source
              ) as f:  # Opens file to be used declaring it as variable 'f'.
        for line in f:  # Loops through each line within file (f).
            words = rgx.findall(
                line)  # Variable utilized to represent words within each line.
            for w in words:  # Loops through each word within each line.
                lw = w.lower(
                )  # Turns words lowercase to remove case sensitivity.
                keys.add(
                    lw
                )  # Adds lowercase word to set represented by variable 'key'.
                if ht.contains_key(
                        lw):  # Checks if word is already present in hash map.
                    new_value = (
                        ht.get(lw) + 1
                    )  # Variable represents word count increased by one.
                    ht.put(
                        lw, new_value
                    )  # Inserts word into hash map to have word count be updated.
                else:
                    ht.put(
                        lw, 1
                    )  # Inserts word into hash map with initial count of one.
    keys_list = []  # Variable represents an empty list.
    for values in keys:  # Loops through words present in set represented by variable 'keys'.
        ind = ht._hash_function(values) % ht.capacity
        # Variable to represent number established by chosen function and available capacity.
        temp = ht._buckets[
            ind]  # Variable to represent position within hash map containing linked list.
        node = temp.contains(
            values
        )  # Variable to represent node containing key if already present.
        keys_list.append(
            (node.key,
             node.value))  # Adds tuple to list containing word, word count.
    keys_list.sort(
        key=lambda tup: tup[1],
        reverse=True)  # Sorts list in descending order based on word count.
    return keys_list[
        0:
        number]  # Returns list of top words within given range provided by user.
    def test_put_3(self):
        """
        Test put() on a hash map of capacity 6.
        :passed: yes
        """
        hash_m = HashMap(6, hash_function_1)
        print("map before put():")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key1", 10))
        print("put('key1', 10):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("box", 20))
        print("put('box', 20):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key2", 30))
        print("put('key2', 30):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key3", 40))
        print("put('key3', 40):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key4", 50))
        print("put('key4', 50):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key5", 60))
        print("put('key5', 60):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key2", 22))
        print("put('key2', 22):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key5", 55))
        print("put('key5', 55):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key6", 66))
        print("put('key6', 66):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key1", 100))
        print("put('key1', 100):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key3", 300))
        print("put('key3', 300):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("key6", 600))
        print("put('key6', 600):")
        print(hash_m)

        self.assertEqual(None, hash_m.put("box", 1000))
        print("put('box', 1000):")
        print(hash_m)
    def test_put_2(self):
        """
        Test put() on a hash map of capacity 1.
        :passed: yes
        """
        hash_m = HashMap(1, hash_function_1)
        print("map before put():", hash_m)

        hash_m.put("key1", 10)
        print("put('key1', 10): ", hash_m)

        hash_m.put("key1", 11)
        print("put('key1', 11): ", hash_m)

        hash_m.put("key2", 20)
        print("put('key2', 20): ", hash_m)

        hash_m.put("key3", 30)
        print("put('key3', 30): ", hash_m)

        # Update key3
        hash_m.put("key3", 31)
        print("put('key3', 31): ", hash_m)

        # Update key2
        hash_m.put("key2", 21)
        print("put('key2', 21): ", hash_m)

        # Update key1
        hash_m.put("key1", 12)
        print("put('key1', 12): ", hash_m)
    def test_get(self):
        """Tests the HashMap get method"""
        test_values = [("test_5", 5), ("test_-5", -5), ("test_5_", 5),
                       ("diff_word", 15), ("another_word", 20), ("set", 10),
                       ("anotha_one", -7), ("completely_different", 5),
                       ("getting_there", -1)]

        collision_values = [("completely_different", 5), ("anotha_one", -7),
                            ("set", 10), ("another_word", 20)]
        head_node = collision_values[0]
        tail_node = collision_values[3]
        student_map = HashMap(10, hash_function_1)

        # add all key value pairs to the table
        for key, val in test_values:
            student_map.put(key, val)

        # test get at linked_list head
        self.assertEqual(student_map.get(head_node[0]), head_node[1])

        # test get at linked_list tail
        self.assertEqual(student_map.get(tail_node[0]), tail_node[1])

        # test get at > 2 collision bucket
        for node in collision_values:
            self.assertEqual(student_map.get(node[0]), node[1])

        # test get with no collision
        self.assertEqual(student_map.get("getting_there"), -1)

        # test that all values are in the list
        for node in test_values:
            self.assertEqual(student_map.get(node[0]), node[1])
Beispiel #11
0
def top_words(source, number):
    """
    Takes a plain text file and counts the number of occurrences of case insensitive words.
    Returns the top `number` of words in a list of tuples of the form (word, count).

    Args:
        source: the file name containing the text
        number: the number of top results to return (e.g. 5 would return the 5 most common words)
    Returns:
        A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)])
    """

    keys = set()

    ht = HashMap(2500,hash_function_2)

    # This block of code will read a file one word as a time and
    # put the word in `w`. It should be left as starter code.
    with open(source) as f:
        for line in f:
            words = rgx.findall(line)
            for w in words:
                if ht.contains_key(w.lower()):
                    ht.put(w.lower(), ht.get(w.lower()) + 1)
                else:
                    ht.put(w.lower(), 1)

        tup = ht.sorted_tup()

        return tup[:number]
 def test_remove_3(self):
     """
     Test remove() on an empty HashMap.
     :passed: yes
     """
     hash_m = HashMap(6, hash_function_1)
     print(hash_m.remove("cat"))
 def test_remove_2(self):
     """
     Test remove() on a HashMap of capacity 0.
     :passed: yes
     """
     hash_m = HashMap(0, hash_function_1)
     print(hash_m.remove("key1"))
 def test_generate_hash_index_1(self):
     """
     Test generate_hash_index.
     :passed: yes
     """
     hash_m = HashMap(6, hash_function_1)
     print(hash_m.generate_hash_index("dog"))
     self.assertEqual(2, hash_m.generate_hash_index("dog"))
Beispiel #15
0
def test_compressor():
    hash_map = HashMap(3)
    hash_code = hash_map.hash("test")
    compressed = hash_map.compressor(hash_code)

    result = (hash_code % 3)

    assert compressed == result
 def test_put_and_get(self):
     hash = HashMap()
     hash[1] = [100]
     hash["Hello"] = "Goodbye"
     hash[1.7] = 45
     assert (hash[1]) == [100]
     assert (hash["Hello"]) == "Goodbye"
     assert (hash[1.7]) == 45
 def test_init(self):
     """Checks that the hash_table initializes correctly, if __init__ is provided, ignore"""
     student_map = HashMap(10, hash_function_1)
     self.assertEqual(10, student_map.capacity)
     self.assertEqual(0, student_map.size)
     self.assertEqual(hash_function_1, student_map._hash_function)
     for bucket in student_map._buckets:
         self.assertIsNone(bucket.head)
	def test_get(self):
		h = HashMap(100)
		h.set('1', SampleObject('A'))
		b_obj = SampleObject('B')
		h.set('2', b_obj)
		h.set('3', SampleObject('C'))
		self.assertEqual(h.get('2'), b_obj)
		self.assertEqual(h.get('4'), None)
Beispiel #19
0
 def test_hash_map_updates_with_other_hash_map(self):
     self.hash_map["test"] = "testing"
     other_hash_map = HashMap()
     other_hash_map["test"] = "testing123"
     other_hash_map["another test"] = "another testing123"
     self.hash_map.update(other_hash_map)
     self.assertEqual("testing123", self.hash_map["test"])
     self.assertEqual(["testing123", "another testing123"],
                      self.hash_map.values())
 def read_text(self):
     with open(self.text_location, 'r', encoding='utf-8') as text:
         hash_map = HashMap()
         for line in text:
             if line == '\n':
                 continue
             hash_map = self.add_words_to_hash_map(hash_map, line.lower())
             self.line_count += 1
         self.hash_maps.append(hash_map)
Beispiel #21
0
def top_words(source, number):
    """
    Takes a plain text file and counts the number of occurrences of case insensitive words.
    Returns the top `number` of words in a list of tuples of the form (word, count).

    Args:
        source: the file name containing the text
        number: the number of top results to return (e.g. 5 would return the 5 most common words)
    Returns:
        A list of tuples of the form (word, count), sorted by most common word. (e.g. [("a", 23), ("the", 20), ("it", 10)])
    """

    keys = set()

    ht = HashMap(2500, hash_function_2)

    # Reads a file one word as a time and
    with open(source) as f:
        for line in f:
            words = rgx.findall(line)
            for w in words:
                w = w.lower(
                )  # covert word to lowercase for case-insensitive comparisons
                if ht.contains_key(
                        w
                ):  # if word already exists as key in ht, add 1 to value to track count
                    value = ht.get(w)
                    ht.put(w, value + 1)

                else:
                    ht.put(
                        w, 1
                    )  # if word does not exist in ht as key, add word as key and initialize value as 1
                    keys.add(w)  # add word to set of keys

    count_dict = {}  # initialize empty dictionary
    count_array = []  # initialize empty array

    for key in keys:  # for each key, get it's value from ht and then add key/value pair to count_dict
        value = ht.get(key)
        count_dict[key] = value

    for key in keys:  # for each key, add value/key pair to array for sorting
        count_array.append((count_dict[key], key))

    count_array = sorted(
        count_array, reverse=True
    )  # reverse sort count_array from largest to smallest value

    for i in range(
            len(count_array)
    ):  # reswap key/value pairs to get (word, count) for each tuple in count_array
        count_array[i] = (count_array[i][1], count_array[i][0])

    return count_array[:
                       number]  # return only the requested number of top words
	def test_set(self):
		# test that set correctly increments the counter when new
		# and that it doesn't increment the size when already in table
		h = HashMap(100)
		self.assertEqual(h.count, 0)
		h.set('1', SampleObject('A'))
		self.assertEqual(h.count, 1)
		h.set('1', SampleObject('A2'))
		self.assertEqual(h.count, 1)
		h.set('2', SampleObject('B'))
		self.assertEqual(h.count, 2)

		# test that it returns false when the table is full
		h = HashMap(100)
		for i in xrange(0, 100):
			added = h.set(str(i), SampleObject(chr(i + 10)))
			self.assertEqual(added, True)
		added = h.set(str(100), SampleObject(chr(100 + 10)))
		self.assertEqual(added, False)
	def test_change_val(self):
		h = HashMap(3)
		h.set('1', SampleObject('A'))
		b_obj = SampleObject('B')
		h.set('2', b_obj)
		self.assertEqual(h.get('2'), b_obj)
		h.set('3', SampleObject('C'))
		b_obj_2 = SampleObject('B2')
		h.set('2', b_obj_2)
		self.assertEqual(h.get('2'), b_obj_2)
 def final_test(self):
     map = HashMap()
     map[1] = 100
     map[5] = 500
     map[20] = 5
     map["Hello"] = "Goodbye"
     assert (map.exists("Hello"))
     assert (map["Hello"]) == "Goodbye"
     map.delete("Hello")
     assert (map["Hello"]) == None
    def test_get_1(self):
        """
        Test get() with Example #1 and #2 from the guidelines.
        :passed: yes
        """
        print("--- EXAMPLE 1 ---")
        m = HashMap(30, hash_function_1)
        print(m.get('key'))
        m.put('key1', 10)
        print(m.get('key1'))

        print("--- EXAMPLE 2 ---")
        m = HashMap(150, hash_function_2)
        for i in range(200, 300, 7):
            m.put(str(i), i * 10)
        print(m.size, m.capacity)
        for i in range(200, 300, 21):
            print(i, m.get(str(i)), m.get(str(i)) == i * 10)
            print(i + 1, m.get(str(i + 1)), m.get(str(i + 1)) == (i + 1) * 10)
 def test_put_4(self):
     """
     Test put() with Example #1 from the guidelines.
     :passed: yes
     """
     print("--- EXAMPLE 1 ---")
     m = HashMap(50, hash_function_1)
     for i in range(150):
         m.put('str' + str(i), i * 100)
         if i % 25 == 24:
             print(m.empty_buckets(), m.table_load(), m.size, m.capacity)
 def test_put_5(self):
     """
     Test put() with Example #2 from the guidelines.
     :passed: yes
     """
     print("--- EXAMPLE 2 ---")
     m = HashMap(40, hash_function_2)
     for i in range(50):
         m.put('str' + str(i // 3), i * 100)
         if i % 10 == 9:
             print(m.empty_buckets(), m.table_load(), m.size, m.capacity)
 def test_resize_table_1(self):
     """
     Test resize_table() with Example #1 from the guidelines.
     :passed: yes
     """
     print("--- EXAMPLE 1 ---")
     m = HashMap(20, hash_function_1)
     m.put('key1', 10)
     print(m.size, m.capacity, m.get('key1'), m.contains_key('key1'))
     m.resize_table(30)
     print(m.size, m.capacity, m.get('key1'), m.contains_key('key1'))
 def test_table_load_2(self):
     """
     Test table_load() with Example #2 from the guidelines.
     :passed: yes
     """
     print("--- EXAMPLE 2 ---")
     m = HashMap(50, hash_function_1)
     for i in range(50):
         m.put('key' + str(i), i * 100)
         if i % 10 == 0:
             print(m.table_load(), m.size, m.capacity)
 def test_empty_buckets_2(self):
     """
     Test empty_buckets() with Example #2 from the guidelines.
     :passed: yes
     """
     print("--- EXAMPLE 2 ---")
     m = HashMap(50, hash_function_1)
     for i in range(150):
         m.put('key' + str(i), i * 100)
         if i % 30 == 0:
             print(m.empty_buckets(), m.size, m.capacity)