def main(): listings_data = (row for row in open('data/listings.txt', 'rU')) products_data = [row for row in open('data/products.txt', 'rU')] # Build product trees # 1. product_tree is the 4-level tree used to split product data for ranking purposes # 2. product_search_tree is the Binary Search tree used to quickly aggregate the results and output them print 'Constructing trees' product_tree = Tree() for index, prod in enumerate(products_data): product = Product(prod) product_tree.insert(product) if index == 0: product_search_tree = BinaryNode(product) else: product_search_tree.insert(product) print 'Matching listings' for listing_row in listings_data: listing = Listing(listing_row) match = product_tree.find(listing) if match is not None: product_search_tree.insert_payload(data=match, payload=listing.original_string) print 'traversing product tree and writing output' with codecs.open('results.txt','w',encoding='utf-8') as result_file: product_search_tree.traverse_with_action(lambda node: result_file.write(node.result_output))
class TestBinarySearchTree(unittest.TestCase): def setUp(self): self.numeric_tree = BinaryNode() self.product_tree = BinaryNode() self.numeric_tree.insert(10) self.numeric_tree.insert(20) self.numeric_tree.insert(15) self.numeric_tree.insert(5) self.numeric_tree.insert(2) self.numeric_tree.insert(7) self.test_product_1 = Product('{"product_name":"Nikon-s6100","manufacturer":"Nikon","model":"S6100","family":"Coolpix","announced-date":"2011-02-08T19:00:00.000-05:00"}') self.test_product_2_no_family = Product('{"product_name":"Casio_QV-5000SX","manufacturer":"Casio","model":"QV-5000SX","announced-date":"1998-04-19T20:00:00.000-04:00"}') self.test_product_3 = Product('{"product_name":"Casio_Exilim_EX-H20G","manufacturer":"Casio","model":"EX-H20g","family":"Exilim","announced-date":"2010-09-19T20:00:00.000-04:00"}') self.product_tree.insert(self.test_product_1) self.product_tree.insert(self.test_product_2_no_family) self.product_tree.insert(self.test_product_3) def test_numeric_tree(self): self.assertEqual(self.numeric_tree.right.data, 10) self.assertEqual(self.numeric_tree.right.right.data, 20) self.assertEqual(self.numeric_tree.right.right.left.data, 15) self.assertEqual(self.numeric_tree.right.left.data, 5) self.assertEqual(self.numeric_tree.right.left.left.data, 2) self.assertEqual(self.numeric_tree.right.left.right.data, 7) def test_product_tree(self): self.assertEqual(self.product_tree.right.data.product_name,'Nikon-s6100') self.assertEqual(self.product_tree.right.left.data.product_name, 'Casio_QV-5000SX') self.assertEqual(self.product_tree.right.left.left.data.product_name, 'Casio_Exilim_EX-H20G') def test_numeric_lookup(self): self.assertEqual(self.numeric_tree.lookup(15).data, 15) self.assertEqual(self.numeric_tree.lookup(7).data, 7) self.assertEqual(self.numeric_tree.lookup(2).data, 2) self.assertEqual(self.numeric_tree.lookup(1),None) self.assertEqual(self.numeric_tree.lookup(1290410), None) def test_product_lookup(self): self.assertEqual(self.product_tree.lookup(self.test_product_1).data, self.test_product_1) self.assertEqual(self.product_tree.lookup(self.test_product_2_no_family).data, self.test_product_2_no_family) self.assertEqual(self.product_tree.lookup(self.test_product_3).data, self.test_product_3) self.assertEqual(self.product_tree.lookup(Product('{"product_name":"Fujifilm-AX305","manufacturer":"Fujifilm","model":"AX305","family":"FinePix","announced-date":"2011-02-15T19:00:00.000-05:00"}')), None) def test_insert_and_pop_payload(self): self.product_tree.insert_payload(self.test_product_3, 'I am the payload') self.assertEqual(self.product_tree.lookup(self.test_product_3).payload[0], 'I am the payload') self.product_tree.insert_payload(self.test_product_3, 'Another payload') self.assertEqual(len(self.product_tree.lookup(self.test_product_3).payload), 2) self.assertEqual(self.product_tree.lookup(self.test_product_3).payload[1],'Another payload') self.product_tree.remove_payload(self.test_product_3) self.assertEqual(len(self.product_tree.lookup(self.test_product_3).payload), 1) self. product_tree.remove_payload(self.test_product_3) self.assertEqual(len(self.product_tree.lookup(self.test_product_3).payload), 0) def test_result_output(self): self.product_tree.insert_payload(self.test_product_2_no_family, 'payload') self.assertEqual(self.product_tree.lookup(self.test_product_2_no_family).result_output, u'{"product_name": "Casio_QV-5000SX", "listings": ["payload"]}\n') self.assertEqual(self.product_tree.lookup(self.test_product_3).result_output, u'') def test_traverse_with_action(self): node_counter = [] self.numeric_tree.traverse_with_action(lambda x: node_counter.append(1)) self.assertEqual(len(node_counter), 7) # 7 to account for initial null node.