def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' ebay_url = Ebay.adapt_url(Ebay, user_request, country) ebay_soup = extract_soup(ebay_url, 1, just_soup=True) ebay_boxes = search_boxes(ebay_soup, Ebay.boxes) getters = { 'ebay_names': len(get_names(ebay_boxes, Ebay.name_and_images)), 'ebay_images': len(get_images(ebay_boxes, Ebay)), 'ebay_urls': len(get_products_urls(ebay_boxes, Ebay)), # 'ebay_price' : len(get_price(country, ebay_boxes, Ebay.price)), } for value in getters: self.assertEqual(len(ebay_boxes), getters[value])
def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' for Page in self.Pages: page_url = Page.adapt_url(Page, user_request, country) page_soup = extract_soup(page_url, 1, just_soup=True) page_boxes = search_boxes(page_soup, Page.boxes) #New test page_names = len(get_names(page_boxes, Page)) page_images = len(get_images(page_boxes, Page)) page_urls = len(get_products_urls(page_boxes, Page)) page_price = len(get_price(country, page_boxes, Page)) trials = [page_names, page_images, page_urls, page_price] for test in trials: self.assertEqual(len(page_boxes), test)
def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country) ml_soup = extract_soup(ml_url, 1, just_soup=True) ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes) getters = { 'ml_names': len(get_names(ml_boxes, Mercado_Libre.name_and_images)), 'ml_images': len(get_images(ml_boxes, Mercado_Libre)), 'ml_urls': len(get_products_urls(ml_boxes, Mercado_Libre)), 'ml_price': len(get_price(country, ml_boxes, Mercado_Libre.price)), } for value in getters: self.assertEqual(len(ml_boxes), getters[value])
def get_cheapest(cheapest_idx, products, cheapest_price=None, country=None, Page=None): products_type = str(type(products)) if products_type == "<class 'bs4.element.ResultSet'>": if country and Page: cheapest_dict = {} cheapest_dict['name'] = get_names(products, Page.name_and_images, position=cheapest_idx) cheapest_dict['image'] = get_images(products, Page, position=cheapest_idx) cheapest_dict['url'] = get_products_urls(products, Page.product_urls, position=cheapest_idx) cheapest_dict['price'] = cheapest_price else: if not country: raise ValueError("Missing country value") elif not Page: raise ValueError("Missing Page object") elif not cheapest_price: raise ValueError("Missing cheapest_price value (int)") else: raise ValueError(f'''Missing Page and country values. You can use this function with a dictionary with all the data preloaded as well''' ) elif products_type == "<class 'dict'>": cheapest_dict = {} products_dictionary = products for key in products_dictionary: cheapest_dict[key] = products_dictionary[key][cheapest_idx] else: error_in_product_type = f"Products type must be a dict or a bs4.element.ResultSet with the boxes. Recived {type(products)}" raise ValueError(error_in_product_type) return cheapest_dict
def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, user_request, country) amazon_soup = extract_soup(amazon_url, 1, just_soup=True) amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images)) amazon_images = len(get_images(amazon_boxes, Amazon)) amazon_urls = len(get_products_urls(amazon_boxes, Amazon)) amazon_price = len(get_price(country, amazon_boxes, Amazon.price)) amazon_reviews = len(get_reviews(country, amazon_boxes, Amazon.reviews)) amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars)) trials = [ amazon_names, amazon_images, amazon_urls, amazon_price, amazon_reviews, amazon_stars ] for test in trials: self.assertEqual(len(amazon_boxes), test)
def request_products(user_request, Page, header, home=False, country='mx'): page_url = Page.adapt_url(Page, user_request, country) # All the HTML of the page page_soup, status = extract_soup(page_url, header) # Wait until receive the info or been denied if status == 503: while status == 503: time.sleep(1) page_soup, status = extract_soup(page_url) elif status == 200: # HTML divided by products, and stored as elements of an array page_boxes = search_boxes(page_soup, Page.boxes) page_products = {} # Obtain the info of the product page_products['names'] = get_names(page_boxes, Page) page_products['images'] = get_images(page_boxes, Page) page_products['urls'] = get_products_urls(page_boxes, Page) page_products['prices'] = get_price(country, page_boxes, Page, home) page_products['status'] = status return page_products else: page_products = {} # With the empty values, not None, the script knows that this won't be # uploaded. In case of one 'None', it thinks that there was a product box # without info. Somethings that occurs in Amazon page_products['store'] = Page.name page_products['idx'] = Page.index page_products['product'] = user_request page_products['names'] = [] page_products['images'] = [] page_products['urls'] = [] page_products['prices'] = [] page_products['status'] = status return page_products
if __name__ == "__main__": user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, user_request, country) #All the HTML of the page amazon_soup = extract_soup(amazon_url, 1, just_soup=True) #HTML divided by products, and stored as elements of an array amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) amazon_products = {} amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images) '''Amazon's images source (link)''' amazon_products['image'] = get_images(amazon_boxes, Amazon) amazon_products['url'] = get_products_urls(amazon_boxes, Amazon) '''Just Amazon's products id. Is used as a url generator: amazon's url + domain + "/dp/" + product_id''' # amazon_products['id']= amazon_products_id(amazon_boxes) '''Just stars as float''' amazon_products['star'] = get_stars(country, amazon_boxes, Amazon.stars) '''Just number of reviews as int''' amazon_products['review'] = get_reviews(country, amazon_boxes, Amazon.reviews) amazon_products['price'] = get_price(country, amazon_boxes, Amazon.price) # print(len(amazon_reviews)) # for key in amazon_products: # print(key, ':', amazon_products[key])
return cheapest_product_dictionary if __name__ == "__main__": user_request = 'audifonos inalambricos' country = 'mx' ebay_url = Ebay.adapt_url(Ebay, user_request, country) #All the HTML of the page ebay_soup = extract_soup(ebay_url, 1, just_soup=True) # #HTML divided by products, and stored as elements of an array ebay_boxes = search_boxes(ebay_soup, Ebay.boxes) # print(ebay_boxes) ebay_products = {} ebay_products['names'] = get_names(ebay_boxes, Ebay.name_and_images) # #Ebay's images source (link) ebay_products['images'] = get_images(ebay_boxes, Ebay) ebay_products['urls'] = get_products_urls(ebay_boxes, Ebay) ebay_products['prices'] = get_price(country, ebay_boxes, Ebay.price) cheapest_idx = cheapest(ebay_products['prices']) cheapest_ebay_product2 = get_cheapest(cheapest_idx, ebay_products) print(f'\nTest ONE:') for key in cheapest_ebay_product2: print(key, ':', cheapest_ebay_product2[key])
ml_soup = extract_soup(ml_url, 0, just_soup=True) # #HTML divided by products, and stored as elements of an array ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes) print(f'Test ONE:') meli_prices = get_price(country, ml_boxes, Mercado_Libre.price) meli_cheapest_idx, meli_cheapest_price = cheapest(meli_prices, position_and_price=True) cheapest_ml_product_1 = get_cheapest(meli_cheapest_idx, ml_boxes, meli_cheapest_price, country, Mercado_Libre) for key in cheapest_ml_product_1: print(key, ':', cheapest_ml_product_1[key]) # # print('boxes:', len(ml_boxes)) ml_products = {} ml_products['names'] = get_names(ml_boxes, Mercado_Libre.name_and_images) #Mercado_Libre's images source (link) ml_products['images'] = get_images(ml_boxes, Mercado_Libre) ml_products['urls'] = get_products_urls(ml_boxes, Mercado_Libre) ml_products['prices'] = get_price(country, ml_boxes, Mercado_Libre.price) cheapest_idx = cheapest(ml_products['prices']) cheapest_ml_product2 = get_cheapest(cheapest_idx, ml_products) print(f'\n\nTest TWO:') for key in cheapest_ml_product2: print(key, ':', cheapest_ml_product2[key])