Example #1
0
 def run(self):
     html = requests.get(self.url, timeout=(10.0, 10.0)).text
     soup = BeautifulSoup(html, "html.parser")
     pageSize = listCrawler.getPageSize(soup)
     for index in range(pageSize):
         page = index+1
         html2 = requests.get(self.url + str(page), timeout=(10.0, 10.0)).text
         soup2 = BeautifulSoup(html2, "html.parser")
         for bookurl in listCrawler.getBookList(soup2):
             runner.booksQueue.put(bookurl)
     time.sleep(1)
Example #2
0
from bs4 import BeautifulSoup
import requests

import bookRunner
from crawler import listCrawler
import urlList

__author__ = 'johnnytsai'

now_save = 0
now_error = []

for l in urlList.booklist:
    html = requests.get(l, timeout=(10.0, 10.0)).text
    soup = BeautifulSoup(html, "html.parser")
    pageSize = listCrawler.getPageSize(soup)
    for index in range(pageSize):
        page = index+1
        html2 = requests.get(l + str(page), timeout=(10.0, 10.0)).text
        soup2 = BeautifulSoup(html2, "html.parser")
        #print(listCrawler.getBookList(soup2))
        for bookurl in listCrawler.getBookList(soup2):
            book = bookRunner.crawlerBook(bookurl, "/Users/johnnytsai/Desktop/books/image/")
            """
            print("ISBN: " + ("None" if book.isbn == None else book.isbn))
            print("Name: " + ("None" if book.name == None else book.name))
            print("Name2: " + ("None" if book.name2 == None else book.name2))
            print("Author: " + ("None" if book.author == None else book.author))
            print("Author2: " + ("None" if book.author2 == None else book.author2))
            print("Translator: " + ("None" if book.translator == None else book.translator))
            print("Publisher: " + ("None" if book.publisher == None else book.publisher))