Example #1
0
def bst_ordered(n, target):
    bst = BSTNode(0)
    # Overall for the next two lines O(n^2)
    for i in range(1, n): # O(n)
        bst.insert(i) # O(n)

    bst.contains(target) # O(n)
Example #2
0
def bst_random(random_nums, target):
    bst = BSTNode(random_nums[0]) # O(1)
    # overall: n log n
    for num in random_nums[1:]: # iteration ->O(n)
        bst.insert(num) # insert is O(log n)

    bst.contains(target) # O(log n) -> with each iteration you can throw out each item you are looking at
class BinarySearchTreeTests(unittest.TestCase):
    def setUp(self):
        self.bst = BSTNode(5)

    def test_insert(self):
        self.bst.insert(2)
        self.bst.insert(3)
        self.bst.insert(7)
        self.bst.insert(6)
        self.assertEqual(self.bst.left.right.value, 3)
        self.assertEqual(self.bst.right.left.value, 6)

    def test_handle_dupe_insert(self):
        self.bst2 = BSTNode(1)
        self.bst2.insert(1)
        self.assertEqual(self.bst2.right.value, 1)

    def test_contains(self):
        self.bst.insert(2)
        self.bst.insert(3)
        self.bst.insert(7)
        self.assertTrue(self.bst.contains(7))
        self.assertFalse(self.bst.contains(8))

    def test_get_max(self):
        self.assertEqual(self.bst.get_max(), 5)
        self.bst.insert(30)
        self.assertEqual(self.bst.get_max(), 30)
        self.bst.insert(300)
        self.bst.insert(3)
        self.assertEqual(self.bst.get_max(), 300)

    """
Example #4
0
class BinarySearchTreeTests(unittest.TestCase):
    def setUp(self):
        self.bst = BSTNode(5)

    def test_insert(self):
        self.bst.insert(2)
        self.bst.insert(3)
        self.bst.insert(7)
        self.bst.insert(6)
        self.assertEqual(self.bst.left.right.value, 3)
        self.assertEqual(self.bst.right.left.value, 6)

    def test_handle_dupe_insert(self):
        self.bst2 = BSTNode(1)
        self.bst2.insert(1)
        self.assertEqual(self.bst2.right.value, 1)

    def test_contains(self):
        self.bst.insert(2)
        self.bst.insert(3)
        self.bst.insert(7)
        self.assertTrue(self.bst.contains(7))
        self.assertFalse(self.bst.contains(8))

    def test_get_max(self):
        self.assertEqual(self.bst.get_max(), 5)
        self.bst.insert(30)
        self.assertEqual(self.bst.get_max(), 30)
        self.bst.insert(300)
        self.bst.insert(3)
        self.assertEqual(self.bst.get_max(), 300)

    def test_for_each(self):
        arr = []

        def cb(x):
            return arr.append(x)

        v1 = random.randint(1, 101)
        v2 = random.randint(1, 101)
        v3 = random.randint(1, 101)
        v4 = random.randint(1, 101)
        v5 = random.randint(1, 101)

        self.bst.insert(v1)
        self.bst.insert(v2)
        self.bst.insert(v3)
        self.bst.insert(v4)
        self.bst.insert(v5)

        self.bst.for_each(cb)

        self.assertTrue(5 in arr)
        self.assertTrue(v1 in arr)
        self.assertTrue(v2 in arr)
        self.assertTrue(v3 in arr)
        self.assertTrue(v4 in arr)
        self.assertTrue(v5 in arr)
Example #5
0
def binary_search(lst, lst2):
    bst = BST(names_1[0])
    for i in range(len(names_1)):
        if i != 0:
            bst.insert(names_1[i])
    for name_2 in names_2:
        if bst.contains(name_2):
            duplicates.append(name_2)
def binary_search_tree_approach():
    duplicates = []

    # insert one of the lists into a binary search tree
    # uses the binary search tree class we built earlier this week
    bst = BSTNode(names_2[0])
    for name in names_2:
        bst.insert(name)

    # search bst for matching names
    for name in names_1:
        if bst.contains(name):
            duplicates.append(name)

    return duplicates
Example #7
0
class Tree:
    def __init__(self):
        self.root = None

    def insert(self, value):
        if self.root:
            return self.root.insert(value)
        else:
            self.root = BSTNode(value)
            return True

    def find(self, value):
        if self.root:
            return self.root.contains(value)
        else:
            return False
Example #8
0
#             duplicates.append(name_1)

# our Binary Search Tree from this week has a module contains...
# this allows us to search whether a value or input is in the bst
# if we move all the names from one list to their own search trees
# we should be able to traverse thru using contains method.
BST_names_1 = BSTNode('')

for i in names_1:
    BST_names_1.insert(i)

# now that we have the names from names_1 in a bst we will itterate
# over the names in names_2, and if the bst of names_1 contains that
# name then we will move it to the duplicates list.
for i in names_2:
    if BST_names_1.contains(i) is True:
        duplicates.append(i)

duplicates = sorted(duplicates)

# # stretch using only arrays...not using our imported BSTNode Class.
# # can this reach a similar efficiency? using BST we were around a runtime of 0.105 seconds
# # this runs in about .63 seconds if not sorting the list, and .95 seconds if sorting. Not faster, but no need
# # to create a whole new class to implement.

# unique_names = set(names_2+names_1)

# duplicates = sorted([i for i in names_1 if i in names_1 and i in names_2])

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
Example #9
0
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements
# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)

bst_names = BSTNode(names_1.pop())
for name_1 in names_1:
    bst_names.insert(name_1)
for name_2 in names_2:
    if bst_names.contains(name_2):
        duplicates.append(name_2)

end_time = time.time()
print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print("Binary search tree")
print (f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.

t_start = time.time()
hash_duplicates = []
print(f"runtime: {end_time - start_time} seconds\n\n")
""" MY IMPROVEMENT SOULTION """
name_tree = BSTNode("")

start_time = time.time()

duplicates = []

# insert all the names in one list into a BST
for name in names_1:
    name_tree.insert(name)

# look at every name in the second list
# and see if the BST cointains it. If it
# does then add it to the duplicates list
for name in names_2:
    if name_tree.contains(name):
        duplicates.append(name)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very
# efficient approach to this problem
# What's the best time you can accomplish?
# There are no restrictions on techniques or data
# structures, but you may not import any additional
# libraries that you did not write yourself.
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements

# runtime: 0.0599980354309082 seconds
first_list = BSTNode(names_1[0])
next(iter(names_1))

for name in names_1:
    first_list.insert(name)

for name in names_2:
    if first_list.contains(name):
        duplicates.append(name)


# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
#             duplicates.append(name_1)


# Insert the names_1 data into a BST
# Use the first name to create the root node of the BST
tree = BSTNode(names_1[0])

# Insert the rest of the names into the existing BST
for name in names_1:        # O(n) to insert each each name from name_1 into the tree
    tree.insert(name)

# Check to see if the tree contains the names from list 2
for name in names_2:        # O(n) to go through each name in names_2 list

    # If the tree contains the name, append to duplicates list
    if tree.contains(name):             # O(log n) to check if the tree contains the name
        duplicates.append(name)     

'''
Runtime of 0.17413 seconds. Runtime complexity is O(n log n)
'''

end_time = time.time()
print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print (f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
Example #13
0
start_time = time.time()

f = open('names_1.txt', 'r')
names_1 = f.read().split("\n")  # List containing 10000 names
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements

bstNames1 = BSTNode(names_1[0])
for name_1 in names_1[1:]:
    bstNames1.insert(name_1)
for name_2 in names_2:
    if bstNames1.contains(name_2):
        duplicates.append(name_1)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
Example #14
0
# duplicates = [name for name in names_1 if name in names_2]
duplicates = []

bst = BSTNode('testing')

# # Not quite sure how else to accomplish this at the moment
# # OPTIMIZED TO RUN IN 0.172 SECONDS ON MY MACHINE
# # for each name in names_1
for name in names_1:  # O(n)
    # add that name to the binary search tree
    bst.insert(name)

# for each name in names 2
for name in names_2:
    # see if that name is already in the bst
    if bst.contains(name):
        # if it is, append it to the duplicates array
        duplicates.append(name)

# Replace the nested for loops below with your improvements
# RUNTIME IS 7.6484 SECONDS ON MY MACHINE
# Current runtime = O(n) + O(n) + O(1) + O(1) == O(n^2)
# for name_1 in names_1:  # O(n)
#     for name_2 in names_2:  # O(n)
#         if name_1 == name_2:  # O(1)
#             duplicates.append(name_1)  # O(1)?

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")
duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements
# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)

bst = BSTNode("NOT TARGET")

for i in names_2:
    bst.insert(i)

for i in names_1:
    if bst.contains(i):
        duplicates.append(i)

print(f"---\n---")

# while not queue.size == len(names_1) - 1:
#     queue.enqueue(names_1[queue.size])

# current_node = queue.storage.head

# while queue.size:
#     current_node = queue.dequeue()

#     if current_node in names_2:
#         duplicates.append(current_node)
Example #16
0
f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements
# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)

bst1 = BSTNode(names_1[0])
for name_1 in names_1[1:]:
    bst1.insert(name_1)
for name_2 in names_2:
    if bst1.contains(name_2):
        duplicates.append(name_2)
# if name_1 == name_2:
#     duplicates.append(name_1)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
            duplicates.append(name_1)

The runtime was:
10.852504014968872 seconds
"""

# Create a BST for names_1
bst_1 = BSTNode(names_1[0])

# For range from the second item in names_1 till the rest of it's length
for i in range(1, len(names_1)):
    # Add every other item to bst_1
    bst_1.insert(names_1[i])

# For range from 0 till the end og names_2
for i in range(0, len(names_2)):
    # Compare items in bst_1 to names_2
    # If bst contains the name in index i
    if bst_1.contains(names_2[i]):
        # Store that name in duplicates
        duplicates.append(names_2[i])

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
Example #18
0
f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Add 2nd list into BST
bst = BSTNode(names_2[0])
for name_2 in names_2:
    if name_2 == 'Nathen Bishop':
        continue
    else:
        bst.insert(name_2)


# Replace the nested for loops below with your improvements
# original runtime: ~4 sec
for name_1 in names_1:
    if bst.contains(name_1):
        duplicates.append(name_1)

end_time = time.time()
print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print (f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
Example #19
0
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements

name1bst = BSTNode('names1')
for name1 in names_1:
    name1bst.insert(name1)

for name2 in names_2:
    if name1bst.contains(name2):
        duplicates.append(name2)

# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
Example #20
0
f = open('names_1.txt', 'r')
names_1 = f.read().split("\n")  # List containing 10000 names
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements
faster_names = BSTNode(names_1[0])

for name_1 in names_1:
    faster_names.insert(name_1)

for name_2 in names_2:
    if faster_names.contains(name_2):
        duplicates.append(name_2)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.

# first runtime was 14seconds optomized runtime is 0.329120397567749 seconds
f = open('names_1.txt', 'r')
names_1 = f.read().split("\n")  # List containing 10000 names
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

tree = BSTNode("Andrew Sitzes")

for names in names_1:
    tree.insert(names)
for checkNames in names_2:
    if tree.contains(checkNames):
        duplicates.append(checkNames)

# Replace the nested for loops below with your improvements
# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

from binary_search_tree import BSTNode

bst_1 = BSTNode(' ')
# bst_2 = BSTNode('Trevor Keith')

for i in names_1:
    #print(i)
    bst_1.insert(i)

print(bst_1.contains('Regina Molina'))

# for j in names_2:
#     bst_2.insert(j)

# duplicates = [j for j in names_2 if bst_2.contains(j)]

for i in names_2:
    if bst_1.contains(i):
        duplicates.append(i)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# runtime was 0.57 with bst, compared with approx. 20 seconds for the original
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements
# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)

nodes = BSTNode('names')

for names in names_1:
    nodes.insert(names)

for name in names_2:
    if nodes.contains(name):
        duplicates.append(name)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
# Replace the nested for loops below with your improvements
# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)

i = 0
for name in names_1:
    if i == 0:
        bt = BSTNode(name)
        i = 1
    else:
        bt.insert(name)

for name in names_2:
    if bt.contains(name):
        duplicates.append(name)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.

start_time = time.time()

duplicates = []
i = 0
names_2 = set(names_2)

# runtime for this seems to be O(n log n)
# because it isn't constant but doesn't exponentially increase
# based on the size of the lists

duplicates = []  # Return the list of duplicates in this data structure

# creating the root node of the tree with the first value of names_1
tree = BSTNode(names_1[0])
# using the insert method to put the rest of the names_1 list to the tree
for name in names_1[1:]:
    tree.insert(name)
# checking the 2nd list with the contains method from the tree to
for name in names_2:
    if tree.contains(name):
        # if the contains method
        duplicates.append(name)
# Replace the nested for loops below with your improvements
# for name_1 in names_1:
#     if name_1 in names_2:
#         duplicates.append(name_1)

end_time = time.time()

print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
Example #26
0
# print (f"\n{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
# print (f"runtime of nested loop (O(n^2)): {end_time - start_time} seconds")

################################################################
# Using BST data structures to reduce time complexity

duplicates1 = []  # Return the list of duplicates in this data structure

binary_tree = BSTNode('names')
start_time1 = time.time()

for name_1 in names_1:
    binary_tree.insert(name_1)

for name_2 in names_2:
    if binary_tree.contains(name_2):
        duplicates1.append(name_2)

end_time1 = time.time()
print(f"\n{len(duplicates1)} duplicates:\n\n{', '.join(duplicates1)}\n\n")
print(
    f"runtime of for loop with Linked list: {end_time1 - start_time1} seconds")

# # ---------- Stretch Goal -----------
# # Python has built-in tools that allow for a very efficient approach to this problem
# # What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# # structures, but you may not import any additional libraries that you did not write yourself.

####################################################################################

# # O(n)
class BinarySearchTreeTests(unittest.TestCase):
    def setUp(self):
        self.bst = BSTNode(5)

    def test_insert(self):
        self.bst.insert(2)
        self.bst.insert(3)
        self.bst.insert(7)
        self.bst.insert(6)
        self.assertEqual(self.bst.left.right.value, 3)
        self.assertEqual(self.bst.right.left.value, 6)
        
    def test_handle_dupe_insert(self):
        self.bst2 = BSTNode(1)
        self.bst2.insert(1)
        self.assertEqual(self.bst2.right.value, 1)

    def test_contains(self):
        self.bst.insert(2)
        self.bst.insert(3)
        self.bst.insert(7)
        self.assertTrue(self.bst.contains(7))
        self.assertFalse(self.bst.contains(8))

    def test_get_max(self):
        self.assertEqual(self.bst.get_max(), 5)
        self.bst.insert(30)
        self.assertEqual(self.bst.get_max(), 30)
        self.bst.insert(300)
        self.bst.insert(3)
        self.assertEqual(self.bst.get_max(), 300)

    def test_for_each(self):
        arr = []
        cb = lambda x: arr.append(x)

        v1 = random.randint(1, 101)
        v2 = random.randint(1, 101)
        v3 = random.randint(1, 101)
        v4 = random.randint(1, 101)
        v5 = random.randint(1, 101)

        self.bst.insert(v1)
        self.bst.insert(v2)
        self.bst.insert(v3)
        self.bst.insert(v4)
        self.bst.insert(v5)

        self.bst.for_each(cb)

        self.assertTrue(5 in arr)
        self.assertTrue(v1 in arr)
        self.assertTrue(v2 in arr)
        self.assertTrue(v3 in arr)
        self.assertTrue(v4 in arr)
        self.assertTrue(v5 in arr)

    def test_print_traversals(self):
        # WARNING:  Tests are for Print()
        # Debug calls to Print() in functions will cause failure

        stdout_ = sys.stdout  # Keep previous value
        sys.stdout = io.StringIO()

        self.bst = BSTNode(1)
        self.bst.insert(8)
        self.bst.insert(5)
        self.bst.insert(7)
        self.bst.insert(6)
        self.bst.insert(3)
        self.bst.insert(4)
        self.bst.insert(2)

        self.bst.in_order_print(self.bst)

        output = sys.stdout.getvalue()
        self.assertEqual(output, "1\n2\n3\n4\n5\n6\n7\n8\n")

        sys.stdout = io.StringIO()
        self.bst.bft_print(self.bst)
        output = sys.stdout.getvalue()
        self.assertTrue(output == "1\n8\n5\n3\n7\n2\n4\n6\n" or
                        output == "1\n8\n5\n7\n3\n6\n4\n2\n")

        sys.stdout = io.StringIO()
        self.bst.dft_print(self.bst)
        output = sys.stdout.getvalue()
        self.assertTrue(output == "1\n8\n5\n7\n6\n3\n4\n2\n" or
                        output == "1\n8\n5\n3\n2\n4\n7\n6\n")

        sys.stdout = io.StringIO()
        self.bst.pre_order_dft(self.bst)
        output = sys.stdout.getvalue()
        self.assertEqual(output, "1\n8\n5\n3\n2\n4\n7\n6\n")

        sys.stdout = io.StringIO()
        self.bst.post_order_dft(self.bst)
        output = sys.stdout.getvalue()
        self.assertEqual(output, "2\n4\n3\n6\n7\n5\n8\n1\n")

        sys.stdout = stdout_  # Restore stdout
start_time = time.time()

f = open('names_1.txt', 'r')
names_1 = f.read().split("\n")  # List containing 10000 names
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements
tree = BSTNode('')

for name1 in names_1:
    tree.insert(name1)

for name2 in names_2:
    if tree.contains(name2):
        duplicates.append(name2)

end_time = time.time()
print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print (f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
Example #29
0
if a > b:
    print("true")
else:
    print("false")

We should be able to use the first letter of the name to determine
where each name should be in a binary search tree.
'''

duplicates = []  # Return the list of duplicates in this data structure
binary_search_tree = BSTNode("root")

for name_1 in names_1:
    binary_search_tree.insert(name_1)

for name_2 in names_2:
    if binary_search_tree.contains(name_2):
        duplicates.append(name_2)

# Improved runtime = 0.25955724716186523 seconds
# Improved runtime complexity = O(2n) || 2n because 2 loops execute n times

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------
# Python has built-in tools that allow for a very efficient approach to this problem
# What's the best time you can accomplish?  Thare are no restrictions on techniques or data
# structures, but you may not import any additional libraries that you did not write yourself.
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n")  # List containing 10000 names
f.close()

duplicates = []  # Return the list of duplicates in this data structure

# Replace the nested for loops below with your improvements
node = BSTNode(names_1[0])
i = 1
while i < len(names_1):
    node.insert(names_1[i])
    i += 1
for each in names_2:
    if node.contains(each):
        duplicates.append(each)


        
# for name_1 in names_1:
#     for name_2 in names_2:
#         if name_1 == name_2:
#             duplicates.append(name_1)


end_time = time.time()
print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print (f"runtime: {end_time - start_time} seconds")

# ---------- Stretch Goal -----------