Esempio n. 1
0
    def collect(self):
        """ Connect to the home-page and grab the information """

        self._soup = FileIO.connect_with_bs4(self.site_url)

        self.num_books = self.__scrap_num_books()
        self.links = self.__scrap_links()
        self.categories = self.__scrap_categories()
Esempio n. 2
0
    def collect(self):
        """ Connect to the category page and grab the information """

        self._soup = FileIO.connect_with_bs4(self.category_url)

        self.name = self.__scrap_name()
        self.num_books = self.__scrap_num_books()
        self.links = self.__scrap_links()
        self.books = self.__scrap_books()
Esempio n. 3
0
    def collect(self):
        """ Connect to the product page and grab the information """

        self._soup = FileIO.connect_with_bs4(self.product_page_url)

        self.universal_product_code = self.__scrap_upc()
        self.title = self.__scrap_title()
        self.price_including_tax = self.__scrap_price_inc_tax()
        self.price_excluding_tax = self.__scrap_price_exc_tax()
        self.number_available = self.__scrap_number_available()
        self.product_description = self.__scrap_product_description()
        self.category = self.__scrap_category()
        self.review_rating = self.__scrap_review_rating()
        self.image_url = self.__scrap_image_url()
Esempio n. 4
0
    def __scrap_links(self):
        def get_links(soup): return soup.select('section a[title]')

        try:
            links = get_links(self._soup)

            page = 2
            while(len(links) < self.num_books):
                base = urljoin(self.category_url, 'page-{}.html'.format(page))
                soup = FileIO.connect_with_bs4(base)
                links.extend(get_links(soup))
                page += 1

            return [(urljoin(self.category_url, x.attrs['href']),
                     x.attrs['title']) for x in links]
        except Exception:
            raise(Exception(f"Can't find the Book links ::\
                    \n{self.product_page_url}"))
Esempio n. 5
0
def test_connect_with_bs4_ERROR():

    with pytest.raises(Exception):
        FileIO.connect_with_bs4('http://www.xxxfakexxx.xxx')
Esempio n. 6
0
def test_connect_with_bs4_TYPE():
    url = 'http://books.toscrape.com'
    assert type(FileIO.connect_with_bs4(url)) == BeautifulSoup