def test_there_is_soup(self): user_request = 'audifonos inalambricos' country = 'mx' amz_url = Amazon.adapt_url(Amazon, country, user_request) amz_soup = extract_soup(amz_url, 1, just_soup=True) self.assertIsNotNone(amz_soup)
def test_conection_status(self): user_request = 'audifonos inalambricos' country = 'mx' amz_url = Amazon.adapt_url(Amazon, country, user_request) amz_status = extract_soup(amz_url, 0, just_status=True) self.assertEqual(amz_status, 200)
def test_there_is_soup(self): user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request) ml_soup = extract_soup(ml_url, 1, just_soup=True) self.assertIsNotNone(ml_soup)
def test_mercado_libre_conection_status(self): user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request) ml_status = extract_soup(ml_url, 0, just_status=True) self.assertEqual(ml_status, 200)
def test_get_brute_info_including_Nones(self): user_request = 'audifonos inalambricos' country = 'mx' amz_url = Amazon.adapt_url(Amazon, country, user_request) amz_soup = extract_soup(amz_url, 1, just_soup=True) #New test amz_boxes = search_boxes(amz_soup, Amazon.boxes) self.assertEqual(len(amz_boxes), 60)
def test_get_brute_info_without_losses(self): user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, country, user_request) amazon_soup = extract_soup(amazon_url, 1, just_soup=True) amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) #New test amazon_string_stars = get_brute_info(amazon_boxes, Amazon.stars) self.assertEqual(len(amazon_boxes), len(amazon_string_stars))
def test_products_info_getters(self): user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, country, user_request) amazon_soup = extract_soup(amazon_url, 1, just_soup=True) amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images)) amazon_images = len(get_images(amazon_boxes, Amazon.name_and_images)) amazon_urls = len(get_products_urls(amazon_boxes, Amazon.product_urls)) amazon_price = len(get_price(country, amazon_boxes, Amazon.price)) amazon_ids = len(amazon_products_id(amazon_boxes)) amazon_reviews = len(get_reviews(country, amazon_boxes, Amazon.reviews)) amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars)) trials = [ amazon_names, amazon_images, amazon_urls, amazon_price, amazon_ids, amazon_reviews, amazon_stars ] for test in trials: self.assertEquals(len(amazon_boxes), test)
from General.scrape_funcs import extract_soup, search_boxes, get_brute_info # import sys # sys.path.insert(1, '"web scraper"/Amazon') from Amazon.data_filters import get_names, get_images, get_products_urls, get_price from Amazon.data_filters import get_stars, get_reviews, amazon_products_id from bs4 import BeautifulSoup user_request = 'audifonos inalambricos' country = 'mx' amazon_url = Amazon.adapt_url(Amazon, country, user_request) #All the HTML of the page amazon_soup = extract_soup(amazon_url, 1, just_soup=True) #HTML divided by products, and stored as elements of an array amazon_boxes = search_boxes(amazon_soup, Amazon.boxes) amazon_products = {} amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images) '''Amazon's images source (link)''' amazon_products['image'] = get_images(amazon_boxes, Amazon.name_and_images) amazon_products['url'] = get_products_urls(amazon_boxes, Amazon.product_urls) '''Just Amazon's products id. Is used as a url generator: amazon's url + domain + "/dp/" + product_id''' amazon_products['id']= amazon_products_id(amazon_boxes)
from General.scrape_data import Mercado_Libre, Products from General.general_funcs import cheapest, get_cheapest from General.scrape_funcs import extract_soup, search_boxes, get_brute_info user_request = 'audifonos inalambricos' country = 'mx' ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request) #All the HTML of the page ml_soup = extract_soup(ml_url, 1, just_soup=True) print(ml_soup.prettify()) # #HTML divided by products, and stored as elements of an array # ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes) # # ml_products = {} # ml_products['names'] = get_names(ml_boxes, Mercado_Libre.name_and_images) # #Mercado_Libre's images source (link) # ml_products['images'] = get_images(ml_boxes, Mercado_Libre.name_and_images) # ml_products['urls'] = get_products_urls(ml_boxes, Mercado_Libre.product_urls) # ml_products['prices'] = get_price(country, ml_boxes, Mercado_Libre.price) # cheapest = cheapest(ml_products['prices']) # cheapest_ml_product = get_cheapest(cheapest, ml_products) # for key in cheapest_ml_product: # print(key, ':', cheapest_ml_product[key])