def testContains(self): # Set up tree tree = BinarySearchTree() tree.add('B') tree.add('A') tree.add('C') # Try looking for an element which doesn't exist self.assertFalse(tree.contains('D')) # Try looking for an element which exists in the root self.assertTrue(tree.contains('B')) # Try looking for an element which exists as the left child of the root self.assertTrue(tree.contains('A')) # Try looking for an element which exists as the right child of the root self.assertTrue(tree.contains('C'))
def randomRemoveTests(self): for i in range(self.LOOPS): size = i tree = BinarySearchTree() lst = self.genRandList(size) for idx, value in enumerate(lst): tree.add(value) self.assertEqual(len(tree), idx + 1) random.shuffle(lst) for j in range(size): value = lst[j] self.assertTrue(tree.contains(value)) # Remove all the elements we just placed in the tree for j in range(size): value = lst[j] tree.remove(value) self.assertFalse(tree.contains(value)) self.assertEqual(len(tree), size - j - 1) self.assertFalse(tree)
def randomRemoveTests(self): for i in range(0, self.LOOPS): sz = i tree = BinarySearchTree() lst = self.genRandList(sz) for value in lst: tree.add(value) random.shuffle(lst) # Remove all the elements we just placed in the tree for j in range(0, sz): value = lst[j] self.assertTrue(tree.remove(value)) self.assertFalse(tree.contains(value)) self.assertEqual(tree.size(), sz - j - 1) self.assertTrue(tree.isEmpty())
f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure # grab the first name to start tree bst = BinarySearchTree(names_1[0]) # insert the rest from first file into the tree for name in names_1[1:]: bst.insert(name) for name in names_2: if bst.contains(name): duplicates.append(name) end_time = time.time() print( f"*******MVP TIME******{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds*******MVP TIME******") # --------------BAD TIME----------------- start_time = time.time() # Replace the nested for loops below with your improvements bad_duplicates = [] for name_1 in names_1: for name_2 in names_2: if name_1 == name_2:
f = open('names/names_1.txt', 'r') names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names/names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] bst = BinarySearchTree(names_1[0]) for i in range(1, len(names_1)-1): bst.insert(names_1[i]) for name in names_2: if (bst.contains(name)): duplicates.append(name) end_time = time.time() print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print (f"runtime: {end_time - start_time} seconds") for name_1 in names_1: # for name_2 in names_2: # By commenting out this line, runtime went from 13 secs to 2 secs # Original runtime of O(n^2) and I made it O(n). if name_1 in names_2: duplicates.append(name_1)