Ejemplo n.º 1
0
    def test_should_retrieve_products_from_query(self, store, query):
        ProductDatabase.clear_database()

        extractor = PageExtractor(store)
        products_list = extractor.retrieve_products_from_query(query)

        assert type(products_list) == list
        # Cannot check for equal because there might be items repited which were not inserted in the database
        assert len(products_list) >= ProductDatabase.get_products_total()
Ejemplo n.º 2
0
    def test_should_get_info_list_about_products(self, store, query):
        # previous_product_total = ProductDatabase.get_products_total()

        extractor = PageExtractor(store)
        parsed_html = extractor.retrieve_html_parsed_from_query(query)
        items_list = extractor.get_info_list_about_products(parsed_html)

        assert type(items_list) == list
        assert type(items_list[0]) == dict
Ejemplo n.º 3
0
 def test_should_get_info_dict_for_product(self, store, query):
     extractor = PageExtractor(store)
     parsed_html = extractor.retrieve_html_parsed_from_query(query)
     parsed_list = extractor.get_items_list_from_parsed_html(parsed_html)
     item = extractor.get_info_dict_for_product(parsed_list[0])
     # import pdb; pdb.set_trace()
     assert type(parsed_list) == element.ResultSet
     assert type(item) == dict
     assert item['name'] is not None
     assert item['link'] is not None
Ejemplo n.º 4
0
    def test_should_store_products_on_json(self, store, query):
        import os
        ProductDatabase.clear_database()
        filename = 'test.json'

        extractor = PageExtractor(store)
        products_list = extractor.retrieve_products_from_query(query)
        extractor.store_products_on_json(products_list, filename)

        # import pdb; pdb.set_trace()
        assert os.path.exists(filename) == True
        # Cannot check for equal because there might be items repited which were not inserted in the database
        assert len(products_list) >= ProductDatabase.get_products_total()
Ejemplo n.º 5
0
    def test_should_query_webdriver(self):
        import os
        # import pandas as pd
        from products import Product
        ProductDatabase.clear_database()
        filename = 'test.json'

        extractor = PageExtractor('submarino')
        products_list = extractor.query_webdriver("goblet of fire")
        products = [Product(item_attrs) for item_attrs in products_list]
        extractor.store_products_on_json(products, filename)
        # import pdb; pdb.set_trace()
        filtered_products = ProductDatabase.filter(price__gte=700,
                                                   price__lt=1000)
        extractor.store_products_on_json(filtered_products,
                                         'test_filtered.json')

        # df = pd.DataFrame({
        #     'name': [prod.name for prod in filtered_products],
        #     'price': [prod.price for prod in filtered_products]
        # })

        assert os.path.exists(filename) == True
        # We cant check it like this since the add_product on ProductDatabase checks whether the product was already inserted
        # assert len(products_list) == ProductDatabase.get_products_total()
        assert ProductDatabase.get_products_total() != 0
Ejemplo n.º 6
0
 def test_should_get_items_list_from_parsed_html(self, store, query):
     extractor = PageExtractor(store)
     parsed_html = extractor.retrieve_html_parsed_from_query(query)
     parsed_list = extractor.get_items_list_from_parsed_html(parsed_html)
     assert type(parsed_list) == element.ResultSet
     assert type(parsed_list[0]) == element.Tag
Ejemplo n.º 7
0
 def test_should_retrieve_html_parsed_from_query(self, store, query):
     extractor = PageExtractor(store)
     parsed_html = extractor.retrieve_html_parsed_from_query(query)
     assert type(parsed_html) == BeautifulSoup
     assert extractor.parsed_html == parsed_html
Ejemplo n.º 8
0
 def test_should_get_search_url(self, store, query, expected_url):
     extractor = PageExtractor(store)
     assert extractor.get_search_url(query) == expected_url
Ejemplo n.º 9
0
 def test_should_get_tag_and_class_for_info(self, store, item,
                                            expected_tuple):
     extractor = PageExtractor(store)
     tag_and_class = extractor.get_tag_and_class_for_info(item)
     assert tag_and_class == expected_tuple
Ejemplo n.º 10
0
 def test_should_init(self):
     extractor = PageExtractor('magazineluiza')
     assert extractor.store_id == 'magazineluiza'
Ejemplo n.º 11
0
 def test_should_convert_BRL_currency_to_float(self, currency_str,
                                               expected_float):
     value = PageExtractor.convert_BRL_currency_to_float(currency_str)
     assert value == expected_float