def main_menu(): option = input( "Enter 'a' for page-1 content, 'b' for all pages, 'q' to quit: " ).strip().lower() idx = 2 while option != 'q': if option == 'a': menu(page.books_list(), log_inducer) elif option == 'b': if idx <= 50: page_content = requests.get( 'http://books.toscrape.com/catalogue/page-' + str(idx) + '.html').content current_page = BooksPage(page_content) for b in current_page.books_list(): print(b) idx += 1 else: print("Enter valid option.") option = input( "Enter 'a' for page-1 content, 'b' for all pages, 'q' to quit: " ).strip().lower()
async def _fetch_page(self, session, page_nr): url = self._get_url(page_nr) self.logger.info(f"Downloading page content for {url}") async with async_timeout.timeout(10): async with session.get(url) as request: books_page = BooksPage(await request.text()) return books_page
from pages.books_page import BooksPage from async_tools.async_request import get_multiple_pages logging.basicConfig(format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', datefmt='%d-%m%-%Y %H:%M:%S', level=logging.INFO, filename='logs.txt') logger = logging.getLogger('scraping') system('clear') logger.info('Loading books list...') print('Loading books list...') page_content = requests.get('http://books.toscrape.com/catalogue/page-1.html').content page = BooksPage(page_content) loop = asyncio.get_event_loop() books = page.books urls = [f'http://books.toscrape.com/catalogue/page-{page_number + 1}.html' for page_number in range(page.page_count)] start = time.time() pages = loop.run_until_complete(get_multiple_pages(loop, *urls)) print(f'Total page requests took {time.time() - start}') for page_content in pages: logger.debug('Creating BooksPage from page content') page = BooksPage(page_content) books.extend(page.books)
import requests from pages.books_page import BooksPage from menu import menu import logger log_inducer = logger.get_logger(__name__) log_inducer.info("Ready to read website content") content = requests.get('http://books.toscrape.com/').content page = BooksPage(content) def main_menu(): option = input( "Enter 'a' for page-1 content, 'b' for all pages, 'q' to quit: " ).strip().lower() idx = 2 while option != 'q': if option == 'a': menu(page.books_list(), log_inducer) elif option == 'b': if idx <= 50: page_content = requests.get( 'http://books.toscrape.com/catalogue/page-' + str(idx) + '.html').content current_page = BooksPage(page_content)
import requests from pages.books_page import BooksPage from utils.config import Config from utils.logger import logger logger.info("Loading the books list...") web_page = requests.get(Config.URI_TO_SCRAPE).content books_page = BooksPage(web_page) books = books_page.books for page_num in range(1, books_page.pages): next_page_url = f"{Config.URI_TO_SCRAPE}catalogue/page-{page_num + 1}.html" next_page = requests.get(next_page_url).content parsed_page = BooksPage(next_page) books.extend(parsed_page.books)