Beispiel #1
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ebay_url = Ebay.adapt_url(Ebay, user_request, country)
        ebay_soup = extract_soup(ebay_url, 1, just_soup=True)
        ebay_boxes = search_boxes(ebay_soup, Ebay.boxes)

        getters = {
            'ebay_names': len(get_names(ebay_boxes, Ebay.name_and_images)),
            'ebay_images': len(get_images(ebay_boxes, Ebay)),
            'ebay_urls': len(get_products_urls(ebay_boxes, Ebay)),
            # 'ebay_price' : len(get_price(country, ebay_boxes, Ebay.price)),
        }

        for value in getters:
            self.assertEqual(len(ebay_boxes), getters[value])
Beispiel #2
0
 def test_products_info_getters(self):
     user_request = 'audifonos inalambricos'
     country = 'mx'
     for Page in self.Pages:
         page_url = Page.adapt_url(Page, user_request, country)
         page_soup = extract_soup(page_url, 1, just_soup=True)
         page_boxes = search_boxes(page_soup, Page.boxes)
         #New test
         page_names = len(get_names(page_boxes, Page))
         page_images = len(get_images(page_boxes, Page))
         page_urls = len(get_products_urls(page_boxes, Page))
         page_price = len(get_price(country, page_boxes, Page))
         
         trials = [page_names, page_images, page_urls, page_price]
         for test in trials:
             self.assertEqual(len(page_boxes), test)
Beispiel #3
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country)
        ml_soup = extract_soup(ml_url, 1, just_soup=True)
        ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes)

        getters = {
            'ml_names': len(get_names(ml_boxes,
                                      Mercado_Libre.name_and_images)),
            'ml_images': len(get_images(ml_boxes, Mercado_Libre)),
            'ml_urls': len(get_products_urls(ml_boxes, Mercado_Libre)),
            'ml_price': len(get_price(country, ml_boxes, Mercado_Libre.price)),
        }

        for value in getters:
            self.assertEqual(len(ml_boxes), getters[value])
Beispiel #4
0
def get_cheapest(cheapest_idx,
                 products,
                 cheapest_price=None,
                 country=None,
                 Page=None):
    products_type = str(type(products))
    if products_type == "<class 'bs4.element.ResultSet'>":

        if country and Page:
            cheapest_dict = {}
            cheapest_dict['name'] = get_names(products,
                                              Page.name_and_images,
                                              position=cheapest_idx)
            cheapest_dict['image'] = get_images(products,
                                                Page,
                                                position=cheapest_idx)
            cheapest_dict['url'] = get_products_urls(products,
                                                     Page.product_urls,
                                                     position=cheapest_idx)
            cheapest_dict['price'] = cheapest_price
        else:
            if not country:
                raise ValueError("Missing country value")
            elif not Page:
                raise ValueError("Missing Page object")
            elif not cheapest_price:
                raise ValueError("Missing cheapest_price value (int)")
            else:
                raise ValueError(f'''Missing Page and country values.
            You can use this function with a dictionary with all the data preloaded as well'''
                                 )

    elif products_type == "<class 'dict'>":
        cheapest_dict = {}
        products_dictionary = products
        for key in products_dictionary:
            cheapest_dict[key] = products_dictionary[key][cheapest_idx]
    else:
        error_in_product_type = f"Products type must be a dict or a bs4.element.ResultSet with the boxes. Recived {type(products)}"
        raise ValueError(error_in_product_type)

    return cheapest_dict
Beispiel #5
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images))
        amazon_images = len(get_images(amazon_boxes, Amazon))
        amazon_urls = len(get_products_urls(amazon_boxes, Amazon))
        amazon_price = len(get_price(country, amazon_boxes, Amazon.price))
        amazon_reviews = len(get_reviews(country, amazon_boxes,
                                         Amazon.reviews))
        amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars))

        trials = [
            amazon_names, amazon_images, amazon_urls, amazon_price,
            amazon_reviews, amazon_stars
        ]
        for test in trials:
            self.assertEqual(len(amazon_boxes), test)
Beispiel #6
0
def request_products(user_request, Page, header, home=False, country='mx'):
    page_url = Page.adapt_url(Page, user_request, country)

    # All the HTML of the page
    page_soup, status = extract_soup(page_url, header)
    # Wait until receive the info or been denied
    if status == 503:
        while status == 503:
            time.sleep(1)
            page_soup, status = extract_soup(page_url)
    elif status == 200:
        # HTML divided by products, and stored as elements of an array
        page_boxes = search_boxes(page_soup, Page.boxes)
        page_products = {}

        # Obtain the info of the product
        page_products['names'] = get_names(page_boxes, Page)
        page_products['images'] = get_images(page_boxes, Page)
        page_products['urls'] = get_products_urls(page_boxes, Page)
        page_products['prices'] = get_price(country, page_boxes, Page, home)
        page_products['status'] = status

        return page_products

    else:
        page_products = {}
        # With the empty values, not None, the script knows that this won't be
        # uploaded. In case of one 'None', it thinks that there was a product box
        # without info. Somethings that occurs in Amazon
        page_products['store'] = Page.name
        page_products['idx'] = Page.index
        page_products['product'] = user_request
        page_products['names'] = []
        page_products['images'] = []
        page_products['urls'] = []
        page_products['prices'] = []
        page_products['status'] = status
        
        return page_products
if __name__ == "__main__":

    user_request = 'audifonos inalambricos'
    country = 'mx'
    amazon_url = Amazon.adapt_url(Amazon, user_request, country)

    #All the HTML of the page
    amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

    #HTML divided by products, and stored as elements of an array
    amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)
    amazon_products = {}

    amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images)
    '''Amazon's images source (link)'''
    amazon_products['image'] = get_images(amazon_boxes, Amazon)

    amazon_products['url'] = get_products_urls(amazon_boxes, Amazon)
    '''Just Amazon's products id. Is used as a url generator:
    amazon's url + domain + "/dp/" + product_id'''
    # amazon_products['id']= amazon_products_id(amazon_boxes)
    '''Just stars as float'''
    amazon_products['star'] = get_stars(country, amazon_boxes, Amazon.stars)
    '''Just number of reviews as int'''
    amazon_products['review'] = get_reviews(country, amazon_boxes,
                                            Amazon.reviews)

    amazon_products['price'] = get_price(country, amazon_boxes, Amazon.price)
    # print(len(amazon_reviews))
    # for key in amazon_products:
    #     print(key, ':', amazon_products[key])
    return cheapest_product_dictionary


if __name__ == "__main__":
    user_request = 'audifonos inalambricos'
    country = 'mx'
    ebay_url = Ebay.adapt_url(Ebay, user_request, country)

    #All the HTML of the page
    ebay_soup = extract_soup(ebay_url, 1, just_soup=True)

    # #HTML divided by products, and stored as elements of an array
    ebay_boxes = search_boxes(ebay_soup, Ebay.boxes)
    # print(ebay_boxes)

    ebay_products = {}

    ebay_products['names'] = get_names(ebay_boxes, Ebay.name_and_images)
    # #Ebay's images source (link)
    ebay_products['images'] = get_images(ebay_boxes, Ebay)

    ebay_products['urls'] = get_products_urls(ebay_boxes, Ebay)
    ebay_products['prices'] = get_price(country, ebay_boxes, Ebay.price)

    cheapest_idx = cheapest(ebay_products['prices'])
    cheapest_ebay_product2 = get_cheapest(cheapest_idx, ebay_products)

    print(f'\nTest ONE:')
    for key in cheapest_ebay_product2:
        print(key, ':', cheapest_ebay_product2[key])
    ml_soup = extract_soup(ml_url, 0, just_soup=True)

    # #HTML divided by products, and stored as elements of an array
    ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes)
    print(f'Test ONE:')
    meli_prices = get_price(country, ml_boxes, Mercado_Libre.price)

    meli_cheapest_idx, meli_cheapest_price = cheapest(meli_prices,
                                                      position_and_price=True)
    cheapest_ml_product_1 = get_cheapest(meli_cheapest_idx, ml_boxes,
                                         meli_cheapest_price, country,
                                         Mercado_Libre)

    for key in cheapest_ml_product_1:
        print(key, ':', cheapest_ml_product_1[key])

    # # print('boxes:', len(ml_boxes))
    ml_products = {}

    ml_products['names'] = get_names(ml_boxes, Mercado_Libre.name_and_images)
    #Mercado_Libre's images source (link)
    ml_products['images'] = get_images(ml_boxes, Mercado_Libre)
    ml_products['urls'] = get_products_urls(ml_boxes, Mercado_Libre)
    ml_products['prices'] = get_price(country, ml_boxes, Mercado_Libre.price)

    cheapest_idx = cheapest(ml_products['prices'])
    cheapest_ml_product2 = get_cheapest(cheapest_idx, ml_products)

    print(f'\n\nTest TWO:')
    for key in cheapest_ml_product2:
        print(key, ':', cheapest_ml_product2[key])