Example #1
0
    datefmt='%d-%m-%Y %H:%M:%S',
    level=logging.INFO,
    filename='logs.txt',
    filemode='w')

logger = logging.getLogger('scraping')

print('Loading books list...')
logger.info('Loading books list...')

book_website = 'http://books.toscrape.com'  # main website link
logger.info(f'Requesting `{book_website}`')
page_content = requests.get(book_website).content

logger.debug('Creating AllBooksPage from page content.')
page = AllBooksPage(page_content)

books_ = []

start_time = time.time()  # initialize time to measure task-time.
logger.info(f'Going through all `{page.page_count}` pages of books...')
for page_num in range(page.page_count):
    page_start = time.time()
    url = f'http://books.toscrape.com/catalogue/page-{page_num+1}.html'
    logger.info(f'Requesting `{url}`')
    page_content = requests.get(url).content
    logger.debug('Creating AllBooksPage from page content.')
    page = AllBooksPage(page_content)
    print(f'`{url}` took `{time.time() - page_start}` seconds.'
          )  # Time taken by each url.
    books_.extend(page.books)
from pages.all_books_page import AllBooksPage

logging.basicConfig(
    format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
    datefmt='%d-%m-%Y %H:%M:%S',
    level=logging.DEBUG,
    filename='logs.txt')

logger = logging.getLogger('scraping')

logger.info('Loading books list...')

page_content = requests.get('http://books.toscrape.com').content

page = AllBooksPage(page_content)  # builds page object

loop = asyncio.get_event_loop()

books = page.books  #returns the books using the method books from pages.all_books_page


async def fetch_page(session, url):
    page_start = time.time()
    # async with aiohttp.ClientSession() as session:
    async with async_timeout.timeout(
        10):  # to terminate a session if it takes too long
        async with session.get(url) as response:
            print(f'page loaded in {time.time() - page_start}')
            #return response.status
            return await response.text(
Example #3
0
import requests

from pages.all_books_page import AllBooksPage

page_content = requests.get('http://books.toscrape.com').content
books = AllBooksPage(page_content).books
Example #4
0
def scrape_books_page(page_number=1) -> AllBooksPage:
    logger.info(f'Scraping book page {page_number}')
    url = BOOKS_URL.format(page_number)
    page_content = requests.get(url).text
    page = AllBooksPage(page_content)
    return page
Example #5
0
import requests
from locators.all_books_page import AllBooksPageLocators
from pages.all_books_page import AllBooksPage

book_content = requests.get('http://books.toscrape.com/').content
book_tag = AllBooksPage(book_content)

for book in book_tag.books:
    print (book)
Example #6
0
import requests
import logging
from pages.all_books_page import AllBooksPage

logging.basicConfig(format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
                    datefmt='%d-%m-%Y %H:%m:%S',
                    level=logging.DEBUG,
                    filename='logs.txt')

logger = logging.getLogger('scraping')
logger.info('Loading book lists...')

page_content = requests.get('http://books.toscrape.com').content
page = AllBooksPage(page_content)
books = page.books

logger.info('Adding page 2 to 50 books in the list')
for page in range(1, page.page_num):
    url = f'http://books.toscrape.com/catalogue/page-{page+1}.html'
    page_content = requests.get(url).content
    new_page = AllBooksPage(page_content)
    books.extend(new_page.books)