예제 #1
0
파일: main.py 프로젝트: aaronlevin/sortable
def main():
   
    listings_data = (row for row in open('data/listings.txt', 'rU'))
    products_data = [row for row in open('data/products.txt', 'rU')]
   

    # Build product trees
    # 1. product_tree is the 4-level tree used to split product data for ranking purposes
    # 2. product_search_tree is the Binary Search tree used to quickly aggregate the results and output them 
    print 'Constructing trees'
    product_tree = Tree()
    for index, prod in enumerate(products_data):
        product = Product(prod)
        product_tree.insert(product)
        if index == 0:
            product_search_tree = BinaryNode(product)
        else:
            product_search_tree.insert(product)

    print 'Matching listings'
    for listing_row in listings_data:
        listing = Listing(listing_row)
        match = product_tree.find(listing)
        if match is not None:
            product_search_tree.insert_payload(data=match, payload=listing.original_string)

    print 'traversing product tree and writing output'
    with codecs.open('results.txt','w',encoding='utf-8') as result_file:
        product_search_tree.traverse_with_action(lambda node: result_file.write(node.result_output))
예제 #2
0
class TestBinarySearchTree(unittest.TestCase):
    def setUp(self):
        self.numeric_tree = BinaryNode()
        self.product_tree = BinaryNode()
        self.numeric_tree.insert(10)
        self.numeric_tree.insert(20)
        self.numeric_tree.insert(15)
        self.numeric_tree.insert(5)
        self.numeric_tree.insert(2)
        self.numeric_tree.insert(7)
        self.test_product_1 = Product('{"product_name":"Nikon-s6100","manufacturer":"Nikon","model":"S6100","family":"Coolpix","announced-date":"2011-02-08T19:00:00.000-05:00"}')
        self.test_product_2_no_family = Product('{"product_name":"Casio_QV-5000SX","manufacturer":"Casio","model":"QV-5000SX","announced-date":"1998-04-19T20:00:00.000-04:00"}')
        self.test_product_3 = Product('{"product_name":"Casio_Exilim_EX-H20G","manufacturer":"Casio","model":"EX-H20g","family":"Exilim","announced-date":"2010-09-19T20:00:00.000-04:00"}')
        self.product_tree.insert(self.test_product_1)
        self.product_tree.insert(self.test_product_2_no_family)
        self.product_tree.insert(self.test_product_3)

    def test_numeric_tree(self):
        self.assertEqual(self.numeric_tree.right.data, 10)
        self.assertEqual(self.numeric_tree.right.right.data, 20)
        self.assertEqual(self.numeric_tree.right.right.left.data, 15)
        self.assertEqual(self.numeric_tree.right.left.data, 5)
        self.assertEqual(self.numeric_tree.right.left.left.data, 2)
        self.assertEqual(self.numeric_tree.right.left.right.data, 7)

    def test_product_tree(self):
        self.assertEqual(self.product_tree.right.data.product_name,'Nikon-s6100')
        self.assertEqual(self.product_tree.right.left.data.product_name, 'Casio_QV-5000SX')
        self.assertEqual(self.product_tree.right.left.left.data.product_name, 'Casio_Exilim_EX-H20G')

    def test_numeric_lookup(self):
        self.assertEqual(self.numeric_tree.lookup(15).data, 15)
        self.assertEqual(self.numeric_tree.lookup(7).data, 7)
        self.assertEqual(self.numeric_tree.lookup(2).data, 2)
        self.assertEqual(self.numeric_tree.lookup(1),None)
        self.assertEqual(self.numeric_tree.lookup(1290410), None)

    def test_product_lookup(self):
        self.assertEqual(self.product_tree.lookup(self.test_product_1).data, self.test_product_1)
        self.assertEqual(self.product_tree.lookup(self.test_product_2_no_family).data, self.test_product_2_no_family)
        self.assertEqual(self.product_tree.lookup(self.test_product_3).data, self.test_product_3)
        self.assertEqual(self.product_tree.lookup(Product('{"product_name":"Fujifilm-AX305","manufacturer":"Fujifilm","model":"AX305","family":"FinePix","announced-date":"2011-02-15T19:00:00.000-05:00"}')), None)

    def test_insert_and_pop_payload(self):
        self.product_tree.insert_payload(self.test_product_3, 'I am the payload')
        self.assertEqual(self.product_tree.lookup(self.test_product_3).payload[0], 'I am the payload')
        self.product_tree.insert_payload(self.test_product_3, 'Another payload')
        self.assertEqual(len(self.product_tree.lookup(self.test_product_3).payload), 2)
        self.assertEqual(self.product_tree.lookup(self.test_product_3).payload[1],'Another payload')
        self.product_tree.remove_payload(self.test_product_3)
        self.assertEqual(len(self.product_tree.lookup(self.test_product_3).payload), 1)
        self. product_tree.remove_payload(self.test_product_3)
        self.assertEqual(len(self.product_tree.lookup(self.test_product_3).payload), 0)

    def test_result_output(self):
        self.product_tree.insert_payload(self.test_product_2_no_family, 'payload')
        self.assertEqual(self.product_tree.lookup(self.test_product_2_no_family).result_output, u'{"product_name": "Casio_QV-5000SX", "listings": ["payload"]}\n')
        self.assertEqual(self.product_tree.lookup(self.test_product_3).result_output, u'')

    def test_traverse_with_action(self):
        node_counter = [] 
        self.numeric_tree.traverse_with_action(lambda x: node_counter.append(1))
        self.assertEqual(len(node_counter), 7) # 7 to account for initial null node.