Example #1
0
    def test_there_is_soup(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amz_url = Amazon.adapt_url(Amazon, country, user_request)

        amz_soup = extract_soup(amz_url, 1, just_soup=True)

        self.assertIsNotNone(amz_soup)
Example #2
0
    def test_conection_status(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amz_url = Amazon.adapt_url(Amazon, country, user_request)

        amz_status = extract_soup(amz_url, 0, just_status=True)

        self.assertEqual(amz_status, 200)
    def test_there_is_soup(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request)

        ml_soup = extract_soup(ml_url, 1, just_soup=True)

        self.assertIsNotNone(ml_soup)
    def test_mercado_libre_conection_status(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request)

        ml_status = extract_soup(ml_url, 0, just_status=True)

        self.assertEqual(ml_status, 200)
Example #5
0
    def test_get_brute_info_including_Nones(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amz_url = Amazon.adapt_url(Amazon, country, user_request)
        amz_soup = extract_soup(amz_url, 1, just_soup=True)

        #New test
        amz_boxes = search_boxes(amz_soup, Amazon.boxes)
        self.assertEqual(len(amz_boxes), 60)
Example #6
0
    def test_get_brute_info_without_losses(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, country, user_request)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        #New test
        amazon_string_stars = get_brute_info(amazon_boxes, Amazon.stars)
        self.assertEqual(len(amazon_boxes), len(amazon_string_stars))
Example #7
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, country, user_request)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images))
        amazon_images = len(get_images(amazon_boxes, Amazon.name_and_images))
        amazon_urls = len(get_products_urls(amazon_boxes, Amazon.product_urls))
        amazon_price = len(get_price(country, amazon_boxes, Amazon.price))
        amazon_ids = len(amazon_products_id(amazon_boxes))
        amazon_reviews = len(get_reviews(country, amazon_boxes,
                                         Amazon.reviews))
        amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars))

        trials = [
            amazon_names, amazon_images, amazon_urls, amazon_price, amazon_ids,
            amazon_reviews, amazon_stars
        ]
        for test in trials:
            self.assertEquals(len(amazon_boxes), test)
Example #8
0
from General.scrape_funcs import extract_soup, search_boxes, get_brute_info

# import sys
# sys.path.insert(1, '"web scraper"/Amazon')

from Amazon.data_filters import get_names, get_images, get_products_urls, get_price
from Amazon.data_filters import get_stars, get_reviews, amazon_products_id

from bs4 import BeautifulSoup

user_request = 'audifonos inalambricos'
country = 'mx'
amazon_url = Amazon.adapt_url(Amazon, country, user_request)

#All the HTML of the page
amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

#HTML divided by products, and stored as elements of an array
amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)
amazon_products = {}

amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images)

'''Amazon's images source (link)'''
amazon_products['image'] = get_images(amazon_boxes, Amazon.name_and_images)

amazon_products['url'] = get_products_urls(amazon_boxes, Amazon.product_urls)

'''Just Amazon's products id. Is used as a url generator:
amazon's url + domain + "/dp/" + product_id'''
amazon_products['id']= amazon_products_id(amazon_boxes)
from General.scrape_data import Mercado_Libre, Products
from General.general_funcs import cheapest, get_cheapest
from General.scrape_funcs import extract_soup, search_boxes, get_brute_info

user_request = 'audifonos inalambricos'
country = 'mx'
ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request)

#All the HTML of the page
ml_soup = extract_soup(ml_url, 1, just_soup=True)
print(ml_soup.prettify())

# #HTML divided by products, and stored as elements of an array
# ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes)
# # ml_products = {}

# ml_products['names'] = get_names(ml_boxes, Mercado_Libre.name_and_images)

# #Mercado_Libre's images source (link)
# ml_products['images'] = get_images(ml_boxes, Mercado_Libre.name_and_images)

# ml_products['urls'] = get_products_urls(ml_boxes, Mercado_Libre.product_urls)

# ml_products['prices'] = get_price(country, ml_boxes, Mercado_Libre.price)

# cheapest = cheapest(ml_products['prices'])
# cheapest_ml_product = get_cheapest(cheapest, ml_products)
# for key in cheapest_ml_product:
#     print(key, ':', cheapest_ml_product[key])