Exemple #1
0
def get_reviews(country, boxes_array, info_tuple, test=False):
    reviews = [None] * len(boxes_array)

    b = 0
    for box in boxes_array:
        #Remember that boxes are arrays
        searcher = search_boxes(box, info_tuple)

        if searcher:
            if len(searcher) > 1:
                searcher = [searcher[0]]

            if country == 'mx':
                try:
                    reviews[b] = int(searcher[0].get_text().replace(',', ''))
                except:
                    pass
            elif country == 'br':
                try:
                    reviews[b] = int(searcher[0].get_text().replace('.', ''))
                except:
                    pass

        b += 1
    if test == True:
        print(reviews)
    return reviews
Exemple #2
0
def get_price(country, boxes_array, info_tuple, test=False):
    price = [None] * len(boxes_array)

    coin_symbol = coins_dict[country]

    b = 0
    for box in boxes_array:
        #Remember that boxes are arrays
        searcher = search_boxes(box, info_tuple)

        if searcher:
            if country == 'mx':
                try:
                    price[b] = float(
                        searcher[0].get_text()[coin_symbol:].replace(',', ''))
                except:
                    pass
            elif country == 'br':
                try:
                    price[b] = float(
                        searcher[0].get_text()[coin_symbol:].replace(
                            '.', '').replace(',', '.'))
                except:
                    pass

        b += 1
    if test == True:
        print(price)
    return price
Exemple #3
0
    def test_get_brute_info_including_Nones(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amz_url = Amazon.adapt_url(Amazon, country, user_request)
        amz_soup = extract_soup(amz_url, 1, just_soup=True)

        #New test
        amz_boxes = search_boxes(amz_soup, Amazon.boxes)
        self.assertEqual(len(amz_boxes), 60)
Exemple #4
0
    def test_get_brute_info_without_losses(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, country, user_request)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        #New test
        amazon_string_stars = get_brute_info(amazon_boxes, Amazon.stars)
        self.assertEqual(len(amazon_boxes), len(amazon_string_stars))
Exemple #5
0
def get_images(boxes_array, info_tuple, test=False):
    images = [None] * len(boxes_array)

    b = 0
    for box in boxes_array:
        #Remember that boxes are arrays
        searcher = search_boxes(box, info_tuple)
        if searcher:
            images[b] = searcher[0].img.get('src')

        b += 1
    if test == True:
        print(images)
    return images
Exemple #6
0
def get_products_urls(boxes_array, info_tuple, test=False):
    urls = [None] * len(boxes_array)

    b = 0
    for box in boxes_array:
        #Remember that boxes are arrays
        searcher = search_boxes(box, info_tuple)
        if searcher:
            source_url = searcher[0].get('href')
            urls[b] = 'https://www.amazon.com.mx' + source_url

        b += 1
    if test == True:
        print(urls)
    return urls
Exemple #7
0
def get_stars(country, boxes_array, info_tuple, test=False):
    stars = [None] * len(boxes_array)

    b = 0
    for box in boxes_array:
        #Remember that boxes are arrays
        searcher = search_boxes(box, info_tuple)

        if searcher:
            if country == 'mx':
                stars[b] = float(searcher[0].get_text()[:3])
            if country == 'br':
                stars[b] = float(searcher[0].get_text()[:3].replace(',', '.'))

        b += 1
    if test == True:
        print(stars)
    return stars
Exemple #8
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, country, user_request)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images))
        amazon_images = len(get_images(amazon_boxes, Amazon.name_and_images))
        amazon_urls = len(get_products_urls(amazon_boxes, Amazon.product_urls))
        amazon_price = len(get_price(country, amazon_boxes, Amazon.price))
        amazon_ids = len(amazon_products_id(amazon_boxes))
        amazon_reviews = len(get_reviews(country, amazon_boxes,
                                         Amazon.reviews))
        amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars))

        trials = [
            amazon_names, amazon_images, amazon_urls, amazon_price, amazon_ids,
            amazon_reviews, amazon_stars
        ]
        for test in trials:
            self.assertEquals(len(amazon_boxes), test)
Exemple #9
0
# sys.path.insert(1, '"web scraper"/Amazon')

from Amazon.data_filters import get_names, get_images, get_products_urls, get_price
from Amazon.data_filters import get_stars, get_reviews, amazon_products_id

from bs4 import BeautifulSoup

user_request = 'audifonos inalambricos'
country = 'mx'
amazon_url = Amazon.adapt_url(Amazon, country, user_request)

#All the HTML of the page
amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

#HTML divided by products, and stored as elements of an array
amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)
amazon_products = {}

amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images)

'''Amazon's images source (link)'''
amazon_products['image'] = get_images(amazon_boxes, Amazon.name_and_images)

amazon_products['url'] = get_products_urls(amazon_boxes, Amazon.product_urls)

'''Just Amazon's products id. Is used as a url generator:
amazon's url + domain + "/dp/" + product_id'''
amazon_products['id']= amazon_products_id(amazon_boxes)

'''Just stars as float'''
amazon_products['star'] = get_stars(country, amazon_boxes, Amazon.stars)