def test_should_retrieve_products_from_query(self, store, query): ProductDatabase.clear_database() extractor = PageExtractor(store) products_list = extractor.retrieve_products_from_query(query) assert type(products_list) == list # Cannot check for equal because there might be items repited which were not inserted in the database assert len(products_list) >= ProductDatabase.get_products_total()
def test_should_get_info_list_about_products(self, store, query): # previous_product_total = ProductDatabase.get_products_total() extractor = PageExtractor(store) parsed_html = extractor.retrieve_html_parsed_from_query(query) items_list = extractor.get_info_list_about_products(parsed_html) assert type(items_list) == list assert type(items_list[0]) == dict
def test_should_get_info_dict_for_product(self, store, query): extractor = PageExtractor(store) parsed_html = extractor.retrieve_html_parsed_from_query(query) parsed_list = extractor.get_items_list_from_parsed_html(parsed_html) item = extractor.get_info_dict_for_product(parsed_list[0]) # import pdb; pdb.set_trace() assert type(parsed_list) == element.ResultSet assert type(item) == dict assert item['name'] is not None assert item['link'] is not None
def test_should_store_products_on_json(self, store, query): import os ProductDatabase.clear_database() filename = 'test.json' extractor = PageExtractor(store) products_list = extractor.retrieve_products_from_query(query) extractor.store_products_on_json(products_list, filename) # import pdb; pdb.set_trace() assert os.path.exists(filename) == True # Cannot check for equal because there might be items repited which were not inserted in the database assert len(products_list) >= ProductDatabase.get_products_total()
def test_should_query_webdriver(self): import os # import pandas as pd from products import Product ProductDatabase.clear_database() filename = 'test.json' extractor = PageExtractor('submarino') products_list = extractor.query_webdriver("goblet of fire") products = [Product(item_attrs) for item_attrs in products_list] extractor.store_products_on_json(products, filename) # import pdb; pdb.set_trace() filtered_products = ProductDatabase.filter(price__gte=700, price__lt=1000) extractor.store_products_on_json(filtered_products, 'test_filtered.json') # df = pd.DataFrame({ # 'name': [prod.name for prod in filtered_products], # 'price': [prod.price for prod in filtered_products] # }) assert os.path.exists(filename) == True # We cant check it like this since the add_product on ProductDatabase checks whether the product was already inserted # assert len(products_list) == ProductDatabase.get_products_total() assert ProductDatabase.get_products_total() != 0
def test_should_get_items_list_from_parsed_html(self, store, query): extractor = PageExtractor(store) parsed_html = extractor.retrieve_html_parsed_from_query(query) parsed_list = extractor.get_items_list_from_parsed_html(parsed_html) assert type(parsed_list) == element.ResultSet assert type(parsed_list[0]) == element.Tag
def test_should_retrieve_html_parsed_from_query(self, store, query): extractor = PageExtractor(store) parsed_html = extractor.retrieve_html_parsed_from_query(query) assert type(parsed_html) == BeautifulSoup assert extractor.parsed_html == parsed_html
def test_should_get_search_url(self, store, query, expected_url): extractor = PageExtractor(store) assert extractor.get_search_url(query) == expected_url
def test_should_get_tag_and_class_for_info(self, store, item, expected_tuple): extractor = PageExtractor(store) tag_and_class = extractor.get_tag_and_class_for_info(item) assert tag_and_class == expected_tuple
def test_should_init(self): extractor = PageExtractor('magazineluiza') assert extractor.store_id == 'magazineluiza'
def test_should_convert_BRL_currency_to_float(self, currency_str, expected_float): value = PageExtractor.convert_BRL_currency_to_float(currency_str) assert value == expected_float