def __init__(self, objs=[], hash_type=HashType.QUADRATIC, size=10, c1=1, c2=1): # if the hash table uses quadratic probing, then round the size to the nearest power of 2 if hash_type is HashType.QUADRATIC: from math import ceil from math import log self.size = pow(2, ceil(log(size) / log(2))) else: self.size = size self.item_count = 0 # Initialize the table self.table = [] # Check that all values are positive for obj in objs: if HashTable.to_value(obj) < 0: print("HashTable only takes positive values.") return # Chain hash tables have a list at each index if hash_type is HashType.CHAIN: for i in range(self.size): self.table.append([]) # open addressing hash tables need empty_since_start initialization else: # Index is a list of index numbers that contain items; it's only needed for open addressed hash tables self.index = AVL() for i in range(self.size): self.table.append(EmptyType.EMPTY_SINCE_START) # Initialize other values self.hash_type = hash_type self.c1 = c1 self.c2 = c2 if objs: for obj in objs: if len(str(obj)) > 0: self.insert(obj)
def plot_times(filename="English.txt", start=500, stop=5500, step=500): """Vary n from 'start' to 'stop', incrementing by 'step'. At each iteration, use the create_word_list() from the 'WordList' module to generate a list of n randomized words from the specified file. Time (separately) how long it takes to load a LinkedList, a BST, and an AVL with the data set. Choose 5 random words from the data set. Time how long it takes to find each word in each object. Calculate the average search time for each object. Create one plot with two subplots. In the first subplot, plot the number of words in each dataset against the build time for each object. In the second subplot, plot the number of words against the search time for each object. Inputs: filename (str): the file to use in creating the data sets. start (int): the lower bound on the sample interval. stop (int): the upper bound on the sample interval. step (int): the space between points in the sample interval. Returns: Show the plot, but do not return any values. """ interval = (stop-start)/step n_list = np.linspace(start,stop,interval+1) n_list = np.int16(n_list) word_list = create_word_list(filename) load_list = [] load_BST = [] load_AVL = [] find_list = [] find_BST = [] find_AVL = [] for n in n_list: temp_word_list = word_list[:n] random_word_indices = np.random.randint(0,n,size=5) words_to_find = [] for x in random_word_indices: words_to_find.append(temp_word_list[x]) L = LinkedList() B = BST() A = AVL() start = time() for word in temp_word_list: L.add(word) end = time() load_list.append(end-start) start = time() for word in temp_word_list: B.insert(word) end = time() load_BST.append(end-start) start = time() for word in temp_word_list: A.insert(word) end = time() load_AVL.append(end-start) start = time() for word in words_to_find: iterative_search(L, word) end = time() find_list.append(end-start) start = time() for word in words_to_find: B.find(word) end = time() find_BST.append(end-start) start = time() for word in words_to_find: A.find(word) end = time() find_AVL.append(end-start) avg_find_list = sum(find_list[:])/5. avg_find_BST = sum(find_BST[:])/5. avg_find_AVL = sum(find_AVL[:])/5. plt.subplot(121) list_plot1 = plt.plot(n_list, load_list,label='Singly-Linked List') BST_plot1 = plt.plot(n_list, load_BST, label='Binary Search Tree') AVL_plot1 = plt.plot(n_list, load_AVL, label='AVL Tree') plt.legend() plt.xlabel('Data Points') plt.ylabel('Seconds') plt.title('Build Times') plt.subplot(122) list_plot2 = plt.plot(n_list, find_list,label='Singly-Linked List') BST_plot2 = plt.plot(n_list, find_BST, label='Binary Search Tree') AVL_plot2 = plt.plot(n_list, find_AVL, label='AVL Tree') plt.legend() plt.xlabel('Data Points') plt.ylabel('Seconds') plt.title('Search Times') plt.show()
class HashTable: def __init__(self, objs=[], hash_type=HashType.QUADRATIC, size=10, c1=1, c2=1): # if the hash table uses quadratic probing, then round the size to the nearest power of 2 if hash_type is HashType.QUADRATIC: from math import ceil from math import log self.size = pow(2, ceil(log(size) / log(2))) else: self.size = size self.item_count = 0 # Initialize the table self.table = [] # Check that all values are positive for obj in objs: if HashTable.to_value(obj) < 0: print("HashTable only takes positive values.") return # Chain hash tables have a list at each index if hash_type is HashType.CHAIN: for i in range(self.size): self.table.append([]) # open addressing hash tables need empty_since_start initialization else: # Index is a list of index numbers that contain items; it's only needed for open addressed hash tables self.index = AVL() for i in range(self.size): self.table.append(EmptyType.EMPTY_SINCE_START) # Initialize other values self.hash_type = hash_type self.c1 = c1 self.c2 = c2 if objs: for obj in objs: if len(str(obj)) > 0: self.insert(obj) @staticmethod def to_value(obj): if isinstance(obj, int): return obj string = str(obj) total = 0 for ch in string: if ch.isdigit(): total += int(ch) elif ch.isalpha(): total += ord(ch) return total # Calculates the hash location based on the value and mode # Insert mode finds the first available bucket # Search mode keeps searching until either an empty_from_start bucket or a number of searches == table size is # reached. def calculate_hash(self, value, mode="insert"): if self.hash_type == HashType.CHAIN: return value % self.size if mode == "insert": if self.hash_type == HashType.LINEAR: bucket = value % self.size # Keep searching until a free bucket is found, then insert the value and break the loop while True: if self.table[bucket] is EmptyType.EMPTY_SINCE_START or \ self.table[bucket] is EmptyType.EMPTY_AFTER_REMOVAL: return bucket else: bucket = (bucket + 1) % self.size elif self.hash_type == HashType.QUADRATIC: i = 0 bucket = (value + self.c1 * i + self.c2 * i**2) % self.size # Keep searching until a free bucket is found, then insert the value and break the loop while True: if self.table[bucket] is EmptyType.EMPTY_SINCE_START or \ self.table[bucket] is EmptyType.EMPTY_AFTER_REMOVAL: return bucket else: i += 1 bucket = (value + self.c1 * i + self.c2 * i ** 2) % self.size elif mode == "search": count = 0 if self.hash_type == HashType.LINEAR: bucket = value % self.size while self.table[bucket] is EmptyType.EMPTY_AFTER_REMOVAL and count < self.size: bucket = (bucket + 1) % self.size count += 1 # if the item is not found in the search, then return None as a failure if count >= self.size: return None return bucket elif self.hash_type == HashType.QUADRATIC: i = 0 bucket = (value + self.c1 * i + self.c2 * i ** 2) % self.size while self.table[bucket] is EmptyType.EMPTY_AFTER_REMOVAL and count < self.size: i += 1 bucket = (value + self.c1 * i + self.c2 * i ** 2) % self.size count += 1 # if the item is not found in the search, then return None as a failure if count >= self.size or self.table[bucket] is EmptyType.EMPTY_SINCE_START: return None return bucket def insert(self, obj, value=None): # Validations/Setups # Quadratic HashTables can only guarantee a spot if the table is half full or less if self.hash_type is HashType.QUADRATIC and self.item_count * 2 >= self.size: self.expand() # Chain hash tables don't need to expand, they deal with collisions in an alternate way # Other hashtable types should not get too full in order to stay efficient elif (self.hash_type is not HashType.CHAIN) and self.item_count * 1.5 >= self.size: self.expand() if value is None: # Don't insert if negative value if self.to_value(obj) < 0 or value < 0: print("Hash table only accepts positive values.") return value = self.calculate_hash(self.to_value(obj)) # Calculate the hash to insert to value = self.calculate_hash(value) if self.hash_type is HashType.CHAIN: self.table[value].append(obj) else: self.table[value] = obj self.index.add(value) self.item_count += 1 # This method expands the HashTable and recalculates the old values def expand(self, multiplication_factor=2): # Copy the table to another instance old_table = self.table.copy() # Get the index before it's cleared if this is an open addressed hash table traversal = [] if self.hash_type is not HashType.CHAIN: self.index.inorder() nodes = self.index.ordered_nodes for node in nodes: if node is not None: traversal.append(node.value) # Reset the table and index self.table.clear() self.index.clear() # Item count will increment when items are re-added self.item_count = 0 # Expand the size self.size *= multiplication_factor # Initialization is different for CHAIN versus others if self.hash_type is HashType.CHAIN: # Expand the size for i in range(self.size): self.table.append([]) # Set new size self.size = len(self.table) # Recalculate old values for old_bucket in old_table: for item in old_bucket: self.insert(item) else: for i in range(self.size): self.table.append(EmptyType.EMPTY_SINCE_START) # Recalculate old values for num in traversal: try: self.insert(old_table[num], old_table[num].number) except: self.insert(old_table[num], num) def remove(self, obj): if self.to_value(obj) < 0: print("Hash table values cannot be negative.") return value = HashTable.to_value(obj) value = self.calculate_hash(value, "search") if self.hash_type == HashType.CHAIN: for i in range(len(self.table[value])): if self.table[value][i] == obj: self.table[value].pop(i) self.item_count -= 1 elif value is not None and self.index.search(value) is not None: self.table[value] = EmptyType.EMPTY_AFTER_REMOVAL self.index.remove(self.index.search(value)) self.item_count -= 1 else: print(f"Object not found, cannot remove.") def search(self, value): value = self.calculate_hash(value, "search") if value is None: return None return self.table[self.calculate_hash(value, "search")] def is_in_table(self, obj): value = HashTable.to_value(obj) value = self.calculate_hash(value, "search") return value > -1 def return_entire_table(self): return_list = [] if self.hash_type == HashType.CHAIN: for bucket in self.table: for item in bucket: return_list.append(item) else: self.index.inorder() for node in self.index.ordered_nodes: if node is not None: return_list.append(self.table[node.value]) return return_list def get_index(self): if self.hash_type != HashType.CHAIN: values = [] self.index.inorder() for node in self.index.ordered_nodes: if node is not None: values.append(node.value) return values else: print("Chained hash tables don't have indexes.") return
def plot_times(filename="English.txt", start=500, stop=5500, step=500): """Vary n from 'start' to 'stop', incrementing by 'step'. At each iteration, use the create_word_list() from the 'WordList' module to generate a list of n randomized words from the specified file. Time (separately) how long it takes to load a LinkedList, a BST, and an AVL with the data set. Choose 5 random words from the data set. Time how long it takes to find each word in each object. Calculate the average search time for each object. Create one plot with two subplots. In the first subplot, plot the number of words in each dataset against the build time for each object. In the second subplot, plot the number of words against the search time for each object. Inputs: filename (str): the file to use in creating the data sets. start (int): the lower bound on the sample interval. stop (int): the upper bound on the sample interval. step (int): the space between points in the sample interval. Returns: Show the plot, but do not return any values. """ def wrapper(func, *args, **kwargs): def wrapped(): return func(*args, **kwargs) return wrapped def add_all(A, my_list): for x in my_list: A.add(x) def add_all_tree(A, my_list): for x in my_list: A.insert(x) def find_it(A, to_find): A.find(to_find) def find_average(A, my_list): find_times = [] for x in range(5): to_find = random.choice(my_list) # to_find = my_list[x] wrapped = wrapper(find_it, A, to_find) find_times.append(timeit.timeit(wrapped, number=1)) return np.mean(find_times) word_list = WordList.create_word_list() word_list = np.random.permutation(word_list) x_values = range(start, stop, step) list_times = [] bst_times = [] avl_times = [] find_list= [] find_bst= [] find_avl= [] A = LinkedList() B = BST() C = AVL() for x in x_values: wrapped = wrapper(add_all, A, word_list[:int(x)]) list_times.append(timeit.timeit(wrapped, number=1)) find_list.append(find_average(A, word_list[:int(x)])) A.clear() for x in x_values: wrapped = wrapper(add_all_tree, B, word_list[:int(x)]) bst_times.append(timeit.timeit(wrapped, number=1)) find_bst.append(find_average(B, word_list[:int(x)])) B.clear() for x in x_values: wrapped = wrapper(add_all_tree, C, word_list[:int(x)]) avl_times.append(timeit.timeit(wrapped, number=1)) find_avl.append(find_average(C, word_list[:int(x)])) C.clear() plt.subplot(121) plt.plot(x_values, list_times, label='Linked List') plt.plot(x_values, bst_times, label='BST') plt.plot(x_values, avl_times, label='AVL') plt.legend(loc='upper left') plt.xlabel('data points') plt.ylabel('seconds') plt.subplot(122) plt.plot(x_values, find_list,label='Linked List') plt.plot(x_values, find_bst, label='BST') plt.plot(x_values, find_avl, label='AVL') plt.legend(loc='upper left') plt.xlabel('data points') plt.ylabel('seconds') plt.show() plt.xlabel('data points')
def plot_times(filename="English.txt", start=500, stop=5500, step=500): """Vary n from 'start' to 'stop', incrementing by 'step'. At each iteration, use the create_word_list() from the 'WordList' module to generate a list of n randomized words from the specified file. Time (separately) how long it takes to load a LinkedList, a BST, and an AVL with the data set. Choose 5 random words from the data set. Time how long it takes to find each word in each object. Calculate the average search time for each object. Create one plot with two subplots. In the first subplot, plot the number of words in each dataset against the build time for each object. In the second subplot, plot the number of words against the search time for each object. Inputs: filename (str): the file to use in creating the data sets. start (int): the lower bound on the sample interval. stop (int): the upper bound on the sample interval. step (int): the space between points in the sample interval. Returns: Show the plot, but do not return any values. """ def get_average_time_linked_list(to_search, linked_list, times_left, current_time = 0): while times_left > 0: start = time.time() iterative_search(linked_list, to_search[times_left-1]) end =time.time() current_time +=(end-start) times_left -=1 return current_time/len(to_search) def get_average_time_BST(to_search, BST_list, times_left, current_time =0): while times_left >0: start = time.time() BST_list.find(to_search[times_left-1]) end = time.time() current_time +=(end-start) times_left -= 1 return current_time/len(to_search) def get_average_time_AVL(to_search, AVL_list, times_left, current_time = 0): while times_left > 0: start = time.time() AVL_list.find(to_search[times_left-1]) end = time.time() current_time +=(end-start) times_left -= 1 return current_time/len(to_search) word_list = create_word_list(filename) if (stop-start)%step!=0: raise ValueError("Your steps won't get you from start to stop") current = start time_linked_list = [] time_BST_list = [] time_AVL_list = [] time_linked_list_search = [] time_BST_list_search = [] time_AVL_list_search = [] set_size = [] while current < stop: current_linked_list = LinkedList() current_BST = BST() current_AVL = AVL() current_list = word_list[:current] to_search = np.random.permutation(current_list) start_linked_time = time.time() for x in current_list: current_linked_list.add(x) end_linked_time = time.time() start_BST_time = time.time() for y in current_list: current_BST.insert(y) end_BST_time = time.time() start_AVL_time = time.time() for z in current_list: current_AVL.insert(z) end_AVL_time = time.time() time_linked_list.append(end_linked_time - start_linked_time) time_BST_list.append(end_BST_time - start_BST_time) time_AVL_list.append(end_AVL_time- start_AVL_time) time_linked_list_search.append(get_average_time_linked_list(to_search,current_linked_list, len(to_search))) time_BST_list_search.append(get_average_time_BST(to_search,current_BST, len(to_search))) time_AVL_list_search.append(get_average_time_AVL(to_search,current_AVL, len(to_search))) set_size.append(current) current+=step plt.subplot(2,1,1) plt.title('Building Data Structures') plt.plot(set_size,time_linked_list, label = 'Linked List', linewidth = 3) plt.plot(set_size, time_BST_list, label = "BST", linewidth = 3) plt.plot(set_size, time_AVL_list, label = "AVL", linewidth = 3) plt.legend(loc = 2) plt.subplot(2,1,2) plt.title("Searching Data Structures") plt.plot(set_size, time_linked_list_search, label = 'Linked list', linewidth = 3) plt.plot(set_size, time_BST_list_search, label = 'BST', linewidth = 3) plt.plot(set_size, time_AVL_list_search, label = 'AVL', linewidth = 3) plt.legend(loc = 2) plt.show()
def timings(): ll = LinkedList() bst = BST() avl = AVL() ll_add = [] bst_add = [] avl_add = [] ll_search = [] bst_search = [] avl_search = [] for items in range(500,5500,500): wordlist = create_word_list(items) ll = LinkedList() before = time.time() for i in xrange(items): ll.add_node(wordlist[i]) after = time.time() ll_add.append(after - before) random_indices = np.random.random_integers(0,items,5) temp = [] for i in xrange(len(random_indices)): before = time.time() iterative_search(ll, wordlist[random_indices[i]]) after = time.time() temp.append(after - before) ll_search.append(sum(temp)/len(temp)) bst = BST() before = time.time() for i in xrange(items): bst.insert(wordlist[i]) after = time.time() bst_add.append(after - before) temp = [] for i in xrange(len(random_indices)): before = time.time() bst.find(wordlist[random_indices[i]]) after = time.time() temp.append(after - before) bst_search.append(sum(temp)/len(temp)) avl = AVL() before = time.time() for i in xrange(items): avl.insert(wordlist[i]) after = time.time() avl_add.append(after - before) temp = [] for i in xrange(len(random_indices)): before = time.time() avl.find(wordlist[random_indices[i]]) after = time.time() temp.append(after - before) avl_search.append(sum(temp)/len(temp)) plt.subplot(1,2,1) plt.plot(ll_add, "r") plt.plot(bst_add, "g") plt.plot(avl_add, "b") plt.subplot(1,2,2) plt.plot(ll_search, "r") plt.plot(bst_search, "g") plt.plot(avl_search, "b") plt.show() plt.close() return ll_add, ll_search, bst_add, bst_search, avl_add, avl_search
def plot_times(filename="English.txt", start=500, stop=5500, step=500): """Vary n from 'start' to 'stop', incrementing by 'step'. At each iteration, use the create_word_list() from the 'WordList' module to generate a list of n randomized words from the specified file. Time (separately) how long it takes to load a LinkedList, a BST, and an AVL with the data set. Choose 5 random words from the data set. Time how long it takes to find each word in each object. Calculate the average search time for each object. Create one plot with two subplots. In the first subplot, plot the number of words in each dataset against the build time for each object. In the second subplot, plot the number of words against the search time for each object. Inputs: filename (str): the file to use in creating the data sets. start (int): the lower bound on the sample interval. stop (int): the upper bound on the sample interval. step (int): the space between points in the sample interval. Returns: Show the plot, but do not return any values. """ ll = LinkedList() bst = BST() avl = AVL() ll_add = [] bst_add = [] avl_add = [] ll_search = [] bst_search = [] avl_search = [] for items in range(start,stop,step): wordlist = create_word_list()[:items] ll = LinkedList() before = time.time() for i in xrange(items): ll.add(wordlist[i]) after = time.time() ll_add.append(after - before) random_indices = np.random.random_integers(0,items,5) temp = [] for i in xrange(len(random_indices)): before = time.time() iterative_search(ll, wordlist[random_indices[i]]) after = time.time() temp.append(after - before) ll_search.append(sum(temp)/len(temp)) bst = BST() before = time.time() for i in xrange(items): bst.insert(wordlist[i]) after = time.time() bst_add.append(after - before) temp = [] for i in xrange(len(random_indices)): before = time.time() bst.find(wordlist[random_indices[i]]) after = time.time() temp.append(after - before) bst_search.append(sum(temp)/len(temp)) avl = AVL() before = time.time() for i in xrange(items): avl.insert(wordlist[i]) after = time.time() avl_add.append(after - before) temp = [] for i in xrange(len(random_indices)): before = time.time() avl.find(wordlist[random_indices[i]]) after = time.time() temp.append(after - before) avl_search.append(sum(temp)/len(temp)) plt.subplot(1,2,1) plt.title("Build Times") plt.plot(ll_add, "b", label="Single-Linked List") plt.plot(bst_add, "g", label="Binary Search Tree") plt.plot(avl_add, "r", label ="AVL Tree") plt.legend(loc="upper left") plt.subplot(1,2,2) plt.title("Search Times") plt.plot(ll_search, "b", label="Single-Linked List") plt.plot(bst_search, "g", label="Binary Search Tree") plt.plot(avl_search, "r", label="AVL Tree") plt.legend(loc="upper left") plt.show() plt.close() return ll_add, ll_search, bst_add, bst_search, avl_add, avl_search