def test_mercado_libre_conection_status(self): user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country) ml_status = extract_soup(ml_url, 0, just_status=True) self.assertEqual(ml_status, 200)
def test_Ebay_conection_status(self): user_request = 'audifonos inalambricos' country = 'mx' ebay_url = Ebay.adapt_url(Ebay, user_request, country) ebay_status = extract_soup(ebay_url, 0, just_status=True) self.assertEqual(ebay_status, 200)
def test_there_is_soup(self): user_request = 'audifonos inalambricos' country = 'mx' amz_url = Amazon.adapt_url(Amazon, country, user_request) amz_soup = extract_soup(amz_url, 1, just_soup=True) self.assertIsNotNone(amz_soup)
def test_there_is_soup(self): user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request) ml_soup = extract_soup(ml_url, 1, just_soup=True) self.assertIsNotNone(ml_soup)
def test_conection_status(self): user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, user_request, country) amz_status = extract_soup(amazon_url, 0, just_status=True) self.assertEqual(amz_status, 200)
def test_there_is_soup(self): user_request = 'audifonos inalambricos' country = 'mx' ebay_url = Ebay.adapt_url(Ebay, user_request, country) ebay_soup = extract_soup(ebay_url, 1, just_soup=True) self.assertIsNotNone(ebay_soup)
def test_conection_status(self): user_request = 'audifonos inalambricos' amz_user_request_edited = user_request.replace(' ', Amazon.space_replacer) amz_url = Amazon.url.replace(Amazon.url_replacers[0], '.mx') amz_url = amz_url.replace(Amazon.url_replacers[1], amz_user_request_edited) amz_status = extract_soup(amz_url, 0, just_status=True) self.assertEqual(amz_status,200)
def test_mercado_libre_conection_status(self): user_request = 'audifonos inalambricos' ml_user_request_edited = user_request.replace(' ', Mercado_Libre.space_replacer) ml_url = Mercado_Libre.url.replace(Mercado_Libre.url_replacers[0], '.mx') ml_url = ml_url.replace(Mercado_Libre.url_replacers[1], ml_user_request_edited) ml_status = extract_soup(ml_url, 0, just_status=True) self.assertEqual(ml_status,200)
def test_get_brute_info_including_Nones(self): user_request = 'audifonos inalambricos' country = 'mx' amz_url = Amazon.adapt_url(Amazon, country, user_request) amz_soup = extract_soup(amz_url, 1, just_soup=True) #New test amz_boxes = search_boxes(amz_soup, Amazon.boxes) self.assertEqual(len(amz_boxes), 60)
def test_conection_status(self): user_request = 'audifonos inalambricos' country = 'mx' for Page in self.Pages: page_url = Page.adapt_url(Page, user_request, country) #New test: conection_status = extract_soup(page_url, 0, just_status=True) self.assertEqual(conection_status,200)
def test_extract_soup(self): user_request = 'audifonos inalambricos' country = 'mx' for Page in self.Pages: page_url = Page.adapt_url(Page, user_request, country) #New test: page_soup = extract_soup(page_url, 1, just_soup=True) self.assertIsNotNone(page_soup)
def test_get_brute_info_including_Nones(self): user_request = 'audifonos inalambricos' country = 'mx' ebay_url = Ebay.adapt_url(Ebay, user_request, country) ebay_soup = extract_soup(ebay_url, 1, just_soup=True) #New test ebay_boxes = search_boxes(ebay_soup, Ebay.boxes) self.assertIsNotNone(len(ebay_boxes))
def test_get_brute_info_without_losses(self): user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, country, user_request) amazon_soup = extract_soup(amazon_url, 1, just_soup=True) amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) #New test amazon_string_stars = get_brute_info(amazon_boxes, Amazon.stars) self.assertEqual(len(amazon_boxes), len(amazon_string_stars))
def test_get_product_boxes(self): user_request = 'audifonos inalambricos' country = 'mx' for Page in self.Pages: page_url = Page.adapt_url(Page, user_request, country) page_soup = extract_soup(page_url, 1, just_soup=True) #New test page_boxes = search_boxes(page_soup, Page.boxes) self.assertIsNotNone(len(page_boxes))
def test_get_brute_info_including_Nones(self): user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country) ml_soup = extract_soup(ml_url, 1, just_soup=True) #New test ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes) self.assertIsNotNone(len(ml_boxes))
def request_products(user_request, Page, header, home=False, country='mx'): page_url = Page.adapt_url(Page, user_request, country) # All the HTML of the page page_soup, status = extract_soup(page_url, header) # Wait until receive the info or been denied if status == 503: while status == 503: time.sleep(1) page_soup, status = extract_soup(page_url) elif status == 200: # HTML divided by products, and stored as elements of an array page_boxes = search_boxes(page_soup, Page.boxes) page_products = {} # Obtain the info of the product page_products['names'] = get_names(page_boxes, Page) page_products['images'] = get_images(page_boxes, Page) page_products['urls'] = get_products_urls(page_boxes, Page) page_products['prices'] = get_price(country, page_boxes, Page, home) page_products['status'] = status return page_products else: page_products = {} # With the empty values, not None, the script knows that this won't be # uploaded. In case of one 'None', it thinks that there was a product box # without info. Somethings that occurs in Amazon page_products['store'] = Page.name page_products['idx'] = Page.index page_products['product'] = user_request page_products['names'] = [] page_products['images'] = [] page_products['urls'] = [] page_products['prices'] = [] page_products['status'] = status return page_products
def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' ebay_url = Ebay.adapt_url(Ebay, user_request, country) ebay_soup = extract_soup(ebay_url, 1, just_soup=True) ebay_boxes = search_boxes(ebay_soup, Ebay.boxes) getters = { 'ebay_names': len(get_names(ebay_boxes, Ebay.name_and_images)), 'ebay_images': len(get_images(ebay_boxes, Ebay)), 'ebay_urls': len(get_products_urls(ebay_boxes, Ebay)), # 'ebay_price' : len(get_price(country, ebay_boxes, Ebay.price)), } for value in getters: self.assertEqual(len(ebay_boxes), getters[value])
def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' for Page in self.Pages: page_url = Page.adapt_url(Page, user_request, country) page_soup = extract_soup(page_url, 1, just_soup=True) page_boxes = search_boxes(page_soup, Page.boxes) #New test page_names = len(get_names(page_boxes, Page)) page_images = len(get_images(page_boxes, Page)) page_urls = len(get_products_urls(page_boxes, Page)) page_price = len(get_price(country, page_boxes, Page)) trials = [page_names, page_images, page_urls, page_price] for test in trials: self.assertEqual(len(page_boxes), test)
def test_cheapest_gets_info(self): user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country) ml_soup = extract_soup(ml_url, 1, just_soup=True) ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes) meli_prices = get_price(country, ml_boxes, Mercado_Libre.price) meli_cheapest_idx, meli_cheapest_price = cheapest( meli_prices, position_and_price=True) cheapest_ml_product_1 = get_cheapest(meli_cheapest_idx, ml_boxes, meli_cheapest_price, country, Mercado_Libre) for value in cheapest_ml_product_1: self.assertIsNotNone(cheapest_ml_product_1[value])
def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country) ml_soup = extract_soup(ml_url, 1, just_soup=True) ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes) getters = { 'ml_names': len(get_names(ml_boxes, Mercado_Libre.name_and_images)), 'ml_images': len(get_images(ml_boxes, Mercado_Libre)), 'ml_urls': len(get_products_urls(ml_boxes, Mercado_Libre)), 'ml_price': len(get_price(country, ml_boxes, Mercado_Libre.price)), } for value in getters: self.assertEqual(len(ml_boxes), getters[value])
def test_cheapest_gets_info(self): user_request = 'audifonos inalambricos' country = 'mx' ebay_url = Ebay.adapt_url(Ebay, user_request, country) ebay_soup = extract_soup(ebay_url, 1, just_soup=True) ebay_boxes = search_boxes(ebay_soup, Ebay.boxes) ebay_prices = get_price(country, ebay_boxes, Ebay.price) ebay_cheapest_idx, ebay_cheapest_price = cheapest( ebay_prices, position_and_price=True) cheapest_ebay_product_1 = get_cheapest(ebay_cheapest_idx, ebay_boxes, ebay_cheapest_price, country, Ebay) for value in cheapest_ebay_product_1: self.assertIsNotNone(cheapest_ebay_product_1[value])
def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, country, user_request) amazon_soup = extract_soup(amazon_url, 1, just_soup=True) amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images)) amazon_images = len(get_images(amazon_boxes, Amazon.name_and_images)) amazon_urls = len(get_products_urls(amazon_boxes, Amazon.product_urls)) amazon_price = len(get_price(country, amazon_boxes, Amazon.price)) amazon_ids = len(amazon_products_id(amazon_boxes)) amazon_reviews = len(get_reviews(country, amazon_boxes, Amazon.reviews)) amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars)) trials = [amazon_names, amazon_images, amazon_urls, amazon_price, amazon_ids, amazon_reviews, amazon_stars] for test in trials: self.assertEquals(len(amazon_boxes), test)
def scraper(Page, user_request, country): #Adapt the url url = Page.adapt_url(Page, country, user_request) #All the HTML of the page soup = extract_soup(url, 1, just_soup=True) # #HTML divided by products, and stored as elements of an array boxes = search_boxes(soup, Page.boxes) # From this part, could get better AFTER the 4 scrapers are made #From the Boxes, obtain the prices prices = get_price(country, boxes, Page.price) #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary cheapest_idx, cheapest_price = cheapest(prices, position_and_price=True) cheapest_product_dictionary = get_cheapest(cheapest_idx, boxes, cheapest_price, country, Page) return cheapest_product_dictionary
def scraper(user_request, country): #Adapt the url ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request) #All the HTML of the page ml_soup = extract_soup(ml_url, 1, just_soup=True) # #HTML divided by products, and stored as elements of an array ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes) # From this part, could get better AFTER the 4 scrapers are made #From the Boxes, obtain the prices meli_prices = get_price(country, ml_boxes, Mercado_Libre.price) #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary meli_cheapest_idx, meli_cheapest_price = cheapest(meli_prices, position_and_price=True) cheapest_ml_product_dictionary = get_cheapest(meli_cheapest_idx, ml_boxes, meli_cheapest_price, country, Mercado_Libre) return cheapest_ml_product_dictionary
def scraper(user_request, country): #Adapt the url amazon_url = Amazon.adapt_url(Amazon, user_request, country) #All the HTML of the page amazon_soup = extract_soup(amazon_url, 1, just_soup=True) # #HTML divided by products, and stored as elements of an array amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) # From this part, could get better AFTER the 4 scrapers are made #From the Boxes, obtain the prices amazon_prices = get_price(country, amazon_boxes, Amazon.price) #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary amazon_cheapest_idx, amazon_cheapest_price = cheapest( amazon_prices, position_and_price=True) cheapest_amazon_product_dictionary = get_cheapest(amazon_cheapest_idx, amazon_boxes, amazon_cheapest_price, country, Amazon) return cheapest_amazon_product_dictionary
#Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary cheapest_idx, cheapest_price = cheapest(prices, position_and_price=True) cheapest_product_dictionary = get_cheapest(cheapest_idx, boxes, cheapest_price, country, Page) return cheapest_product_dictionary if __name__ == "__main__": user_request = 'audifonos inalambricos' country = 'mx' ebay_url = Ebay.adapt_url(Ebay, user_request, country) #All the HTML of the page ebay_soup = extract_soup(ebay_url, 1, just_soup=True) # #HTML divided by products, and stored as elements of an array ebay_boxes = search_boxes(ebay_soup, Ebay.boxes) # print(ebay_boxes) ebay_products = {} ebay_products['names'] = get_names(ebay_boxes, Ebay.name_and_images) # #Ebay's images source (link) ebay_products['images'] = get_images(ebay_boxes, Ebay) ebay_products['urls'] = get_products_urls(ebay_boxes, Ebay) ebay_products['prices'] = get_price(country, ebay_boxes, Ebay.price) cheapest_idx = cheapest(ebay_products['prices'])
cheapest_amazon_product_dictionary = get_cheapest(amazon_cheapest_idx, amazon_boxes, amazon_cheapest_price, country, Amazon) return cheapest_amazon_product_dictionary if __name__ == "__main__": user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, user_request, country) #All the HTML of the page amazon_soup = extract_soup(amazon_url, 1, just_soup=True) #HTML divided by products, and stored as elements of an array amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) amazon_products = {} amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images) '''Amazon's images source (link)''' amazon_products['image'] = get_images(amazon_boxes, Amazon) amazon_products['url'] = get_products_urls(amazon_boxes, Amazon) '''Just Amazon's products id. Is used as a url generator: amazon's url + domain + "/dp/" + product_id''' # amazon_products['id']= amazon_products_id(amazon_boxes) '''Just stars as float''' amazon_products['star'] = get_stars(country, amazon_boxes, Amazon.stars)
from scrape_data import Mercado_Libre, Products from scrape_funcs import extract_soup, search_boxes, get_brute_info from data_filters import get_names, get_images, get_products_urls, get_price from general_funcs import cheapest, get_cheapest user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request) #All the HTML of the page ml_soup = extract_soup(ml_url, 1, just_soup=True) #HTML divided by products, and stored as elements of an array ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes) ml_products = {} ml_products['names'] = get_names(ml_boxes, Mercado_Libre.name_and_images) #Mercado_Libre's images source (link) ml_products['images'] = get_images(ml_boxes, Mercado_Libre.name_and_images) ml_products['urls'] = get_products_urls(ml_boxes, Mercado_Libre.product_urls) ml_products['prices'] = get_price(country, ml_boxes, Mercado_Libre.price) cheapest = cheapest(ml_products['prices']) cheapest_ml_product = get_cheapest(cheapest, ml_products) for key in cheapest_ml_product: print(key, ':', cheapest_ml_product[key])