class BinarySearchTreeTests(unittest.TestCase): def setUp(self): self.bst = BSTNode(5) def test_insert(self): self.bst.insert(2) self.bst.insert(3) self.bst.insert(7) self.bst.insert(6) self.assertEqual(self.bst.left.right.value, 3) self.assertEqual(self.bst.right.left.value, 6) def test_handle_dupe_insert(self): self.bst2 = BSTNode(1) self.bst2.insert(1) self.assertEqual(self.bst2.right.value, 1) def test_contains(self): self.bst.insert(2) self.bst.insert(3) self.bst.insert(7) self.assertTrue(self.bst.contains(7)) self.assertFalse(self.bst.contains(8)) def test_get_max(self): self.assertEqual(self.bst.get_max(), 5) self.bst.insert(30) self.assertEqual(self.bst.get_max(), 30) self.bst.insert(300) self.bst.insert(3) self.assertEqual(self.bst.get_max(), 300) def test_for_each(self): arr = [] cb = lambda x: arr.append(x) v1 = random.randint(1, 101) v2 = random.randint(1, 101) v3 = random.randint(1, 101) v4 = random.randint(1, 101) v5 = random.randint(1, 101) self.bst.insert(v1) self.bst.insert(v2) self.bst.insert(v3) self.bst.insert(v4) self.bst.insert(v5) self.bst.for_each(cb) self.assertTrue(5 in arr) self.assertTrue(v1 in arr) self.assertTrue(v2 in arr) self.assertTrue(v3 in arr) self.assertTrue(v4 in arr) self.assertTrue(v5 in arr)
duplicates = [] # Return the list of duplicates in this data structure # O (n^2) # This Method is 5.24 Seconds # Replace the nested for loops below with your improvements # for name_1 in names_1: # for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) # 0(n log n) ? # BST Method Done in 0.11 Seconds bst = BSTNode("") # Build BST for name_2 in names_2: bst.insert(name_2) for name_1 in names_1: if bst.contains(name_1): duplicates.append(name_1) end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
f = open('names_1.txt', 'r') names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements new_data = BSTNode("Jean") for i in names_1: new_data.insert(i) for i in names_2: if new_data.contains(i): duplicates.append(i) # O(n^2) -> O(n) & look up is O(log(n)) end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
class BinarySearchTreeTests(unittest.TestCase): def setUp(self): self.bst = BSTNode(5) def test_insert(self): self.bst.insert(2) self.bst.insert(3) self.bst.insert(7) self.bst.insert(6) self.assertEqual(self.bst.left.right.value, 3) self.assertEqual(self.bst.right.left.value, 6) def test_handle_dupe_insert(self): self.bst2 = BSTNode(1) self.bst2.insert(1) self.assertEqual(self.bst2.right.value, 1) def test_contains(self): self.bst.insert(2) self.bst.insert(3) self.bst.insert(7) self.assertTrue(self.bst.contains(7)) self.assertFalse(self.bst.contains(8)) def test_get_max(self): self.assertEqual(self.bst.get_max(), 5) self.bst.insert(30) self.assertEqual(self.bst.get_max(), 30) self.bst.insert(300) self.bst.insert(3) self.assertEqual(self.bst.get_max(), 300) def test_get_min(self): self.assertEqual(self.bst.get_min(), 5) self.bst.insert(30) self.assertEqual(self.bst.get_min(), 5) self.bst.insert(300) self.bst.insert(3) self.assertEqual(self.bst.get_min(), 3) def test_for_each(self): arr = [] cb = lambda x: arr.append(x) v1 = random.randint(1, 101) v2 = random.randint(1, 101) v3 = random.randint(1, 101) v4 = random.randint(1, 101) v5 = random.randint(1, 101) self.bst.insert(v1) self.bst.insert(v2) self.bst.insert(v3) self.bst.insert(v4) self.bst.insert(v5) self.bst.for_each(cb) self.assertTrue(5 in arr) self.assertTrue(v1 in arr) self.assertTrue(v2 in arr) self.assertTrue(v3 in arr) self.assertTrue(v4 in arr) self.assertTrue(v5 in arr) def test_print_traversals(self): # WARNING: Tests are for Print() # Debug calls to Print() in functions will cause failure stdout_ = sys.stdout # Keep previous value sys.stdout = io.StringIO() self.bst = BSTNode(1) self.bst.insert(8) self.bst.insert(5) self.bst.insert(7) self.bst.insert(6) self.bst.insert(3) self.bst.insert(4) self.bst.insert(2) self.bst.in_order_print(self.bst) output = sys.stdout.getvalue() self.assertEqual(output, "1\n2\n3\n4\n5\n6\n7\n8\n") sys.stdout = io.StringIO() self.bst.bft_print(self.bst) output = sys.stdout.getvalue() self.assertTrue(output == "1\n8\n5\n3\n7\n2\n4\n6\n" or output == "1\n8\n5\n7\n3\n6\n4\n2\n") sys.stdout = io.StringIO() self.bst.dft_print(self.bst) output = sys.stdout.getvalue() self.assertTrue(output == "1\n8\n5\n7\n6\n3\n4\n2\n" or output == "1\n8\n5\n3\n2\n4\n7\n6\n") sys.stdout = io.StringIO() self.bst.pre_order_dft(self.bst) output = sys.stdout.getvalue() self.assertEqual(output, "1\n8\n5\n3\n2\n4\n7\n6\n") sys.stdout = io.StringIO() self.bst.post_order_dft(self.bst) output = sys.stdout.getvalue() self.assertEqual(output, "2\n4\n3\n6\n7\n5\n8\n1\n") sys.stdout = stdout_ # Restore stdout
start_time = time.time() f = open('names_1.txt', 'r') names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements BSTNode(None) for name_1 in names_1: BSTNode.insert(name_1) for name_2 in names_2: if BSTNode.contains(name_2): duplicates.append(name_1) end_time = time.time() print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print (f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
for name_1 in names_1: for name_2 in names_2: if name_1 == name_2: duplicates.append(name_1) end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") start2 = time.time() duplicates2 = [] search = BSTNode(names_1[0]) for n1 in names_1: search.insert(n1) for n2 in names_2: if search.contains(n2): duplicates2.append(n2) end2 = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end2 - start2} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
names_2_bst = BSTNode("") for name in names_2: names_2_bst.insert(name) duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements # for name_1 in names_1: # for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) for n1 in names_1: if names_2_bst.contains(n1): duplicates.append(n1) end_time = time.time() print("BST Version:") print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself. start_time2 = time.time() dupes = [] names2_set = set(names_2)
f = open('names_1.txt', 'r') names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() # CREATE TREE WITH names_1 LIST start_time = time.time() tree = BSTNode(names_1[0]) for name in names_1: tree.insert(name) duplicates = [name for name in names_2 if tree.contains(name)] end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # CREATE TREE WITH names_2 LIST (just in case order is better) # ... it's not. it's the same start_time = time.time() tree = BSTNode(names_2[0]) for name in names_2: tree.insert(name) duplicates = [name for name in names_1 if tree.contains(name)]
names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements # for name_1 in names_1: # for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) names_tree = BSTNode(names_1[0]) for name in names_1[1:]: names_tree.insert(name) for name in names_2: if names_tree.contains(name): duplicates.append(name) end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
# ---------------------------------------------------------------------------- # --------------------T-H-I-S--I-S--F-A-S-T----------------------------------- # duplicates = numpy.intersect1d(names_1, names_2) # ---------------------------------------------------------------------------- # ---------------------------------------------------------------------------- # Instantiate a tree/node with the first name in the `name_1` file b_s_tree = BSTNode(names_1[0]) # Put all the names in the file in the tree for n in names_1: b_s_tree.insert(n) # List comprehension duplicates = [n for n in names_2 if b_s_tree.contains(n)] # ---------------------------------------------------------------------------- print('\nTime complexity: O(n log n)\n') print('How many are common:', len(duplicates), '\n') print('What\'s_common', duplicates) end_time = time.time() # print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"\nRuntime: {end_time - start_time:.03} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
start_time = time.time() f = open('names_1.txt', 'r') names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() tree = BSTNode(names_1[0]) duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements for name_1 in names_1: tree.insert(name_1) for name_2 in names_2: if tree.contains(name_2): duplicates.append(name_2) end_time = time.time() print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print (f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements bst_node = BSTNode("") # We'll need to loop through all the names in 'names_1' file for name in names_1: # use the 'insert' method to add each name bst_node.insert(name) # Loop through all the names in 'name_2' file for name in names_2: # Use an if statement to check if the tree has identical names if bst_node.contains(name): # in the case it does, append those names to the duplicated list duplicates.append(name) end_time = time.time() print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print (f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure bst = BSTNode("") # Replace the nested for loops below with your improvements # for name_1 in names_1: # for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) for name in names_2: bst.insert(name) for other_name in names_1: if bst.contains(other_name): duplicates.append(other_name) end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
# Replace the nested for loops below with your improvements # for name_1 in names_1: # for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) # my implementation # maybe use bst for one of the lists. # method on bst to maybe compare the second method. bst = BSTNode(names_1[0]) for name in names_1: bst.insert(name) for name in names_2: if bst.contains(name): duplicates.append(name) end_time = time.time() print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print (f"runtime: {end_time - start_time} seconds") # runtime complexity of original answer # quadratic time or O(n squared) because for every value visited in names1.txt, we have to # visit every single value of names2.txt. thats every input of names1 multiplied by # every value of names2. hence big o of n squared time. As the input increases, the number # of computations is exponentially growing. # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem
f.close() f = open('./names/names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure # Replace the nested for loops below with your improvements # for name_1 in names_1: # for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) # runtime: 12.790003776550293 seconds BST = BSTNode(names_1[0]) for name in names_1: BST.insert(name) for name in names_2: if BST.contains(name): duplicates.append(name) # runtime: 0.16200661659240723 seconds end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
f = open('names_1.txt', 'r') names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open('names_2.txt', 'r') names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure bst = BSTNode(names_1[0]) # Replace the nested for loops below with your improvements for name_1 in names_1: bst.insert(name_1) for name_2 in names_2: if bst.contains(name_2): duplicates.append(name_2) # for name_1 in names_1: # for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) end_time = time.time() print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print (f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data # structures, but you may not import any additional libraries that you did not write yourself.
names_1 = f.read().split("\n") # List containing 10000 names f.close() f = open("names_2.txt", "r") names_2 = f.read().split("\n") # List containing 10000 names f.close() duplicates = [] # Return the list of duplicates in this data structure tree = BSTNode(names_1[0]) duplicateCheck = [name for name in names_1[1:]] for name in names_1: tree.insert(name) for name in names_2: if tree.contains(name): duplicates.append(name) # Replace the nested for loops below with your improvements # for name_1 in names_1: # for name_2 in names_2: # if name_1 == name_2: # duplicates.append(name_1) end_time = time.time() print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n") print(f"runtime: {end_time - start_time} seconds") # ---------- Stretch Goal ----------- # Python has built-in tools that allow for a very efficient approach to this problem # What's the best time you can accomplish? Thare are no restrictions on techniques or data