Exemple #1
0
def main_menu():
    option = input(
        "Enter 'a' for page-1 content, 'b' for all pages, 'q' to quit: "
    ).strip().lower()

    idx = 2
    while option != 'q':

        if option == 'a':
            menu(page.books_list(), log_inducer)

        elif option == 'b':
            if idx <= 50:
                page_content = requests.get(
                    'http://books.toscrape.com/catalogue/page-' + str(idx) +
                    '.html').content
                current_page = BooksPage(page_content)
                for b in current_page.books_list():
                    print(b)
                idx += 1

        else:
            print("Enter valid option.")

        option = input(
            "Enter 'a' for page-1 content, 'b' for all pages, 'q' to quit: "
        ).strip().lower()
Exemple #2
0
 async def _fetch_page(self, session, page_nr):
     url = self._get_url(page_nr)
     self.logger.info(f"Downloading page content for {url}")
     async with async_timeout.timeout(10):
         async with session.get(url) as request:
             books_page = BooksPage(await request.text())
             return books_page
Exemple #3
0
from pages.books_page import BooksPage
from async_tools.async_request import get_multiple_pages

logging.basicConfig(format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
                    datefmt='%d-%m%-%Y %H:%M:%S',
                    level=logging.INFO,
                    filename='logs.txt')

logger = logging.getLogger('scraping')

system('clear')
logger.info('Loading books list...')
print('Loading books list...')

page_content = requests.get('http://books.toscrape.com/catalogue/page-1.html').content
page = BooksPage(page_content)

loop = asyncio.get_event_loop()

books = page.books

urls = [f'http://books.toscrape.com/catalogue/page-{page_number + 1}.html'
        for page_number in range(page.page_count)]
start = time.time()
pages = loop.run_until_complete(get_multiple_pages(loop, *urls))
print(f'Total page requests took {time.time() - start}')

for page_content in pages:
    logger.debug('Creating BooksPage from page content')
    page = BooksPage(page_content)
    books.extend(page.books)
Exemple #4
0
import requests

from pages.books_page import BooksPage
from menu import menu

import logger

log_inducer = logger.get_logger(__name__)
log_inducer.info("Ready to read website content")

content = requests.get('http://books.toscrape.com/').content
page = BooksPage(content)


def main_menu():
    option = input(
        "Enter 'a' for page-1 content, 'b' for all pages, 'q' to quit: "
    ).strip().lower()

    idx = 2
    while option != 'q':

        if option == 'a':
            menu(page.books_list(), log_inducer)

        elif option == 'b':
            if idx <= 50:
                page_content = requests.get(
                    'http://books.toscrape.com/catalogue/page-' + str(idx) +
                    '.html').content
                current_page = BooksPage(page_content)
Exemple #5
0
import requests
from pages.books_page import BooksPage
from utils.config import Config
from utils.logger import logger

logger.info("Loading the books list...")

web_page = requests.get(Config.URI_TO_SCRAPE).content
books_page = BooksPage(web_page)

books = books_page.books

for page_num in range(1, books_page.pages):
    next_page_url = f"{Config.URI_TO_SCRAPE}catalogue/page-{page_num + 1}.html"
    next_page = requests.get(next_page_url).content
    parsed_page = BooksPage(next_page)
    books.extend(parsed_page.books)