Ejemplo n.º 1
0
 def run(self):
     html = requests.get(self.url, timeout=(10.0, 10.0)).text
     soup = BeautifulSoup(html, "html.parser")
     pageSize = listCrawler.getPageSize(soup)
     for index in range(pageSize):
         page = index+1
         html2 = requests.get(self.url + str(page), timeout=(10.0, 10.0)).text
         soup2 = BeautifulSoup(html2, "html.parser")
         for bookurl in listCrawler.getBookList(soup2):
             runner.booksQueue.put(bookurl)
     time.sleep(1)
Ejemplo n.º 2
0
__author__ = 'johnnytsai'

now_save = 0
now_error = []

for l in urlList.booklist:
    html = requests.get(l, timeout=(10.0, 10.0)).text
    soup = BeautifulSoup(html, "html.parser")
    pageSize = listCrawler.getPageSize(soup)
    for index in range(pageSize):
        page = index+1
        html2 = requests.get(l + str(page), timeout=(10.0, 10.0)).text
        soup2 = BeautifulSoup(html2, "html.parser")
        #print(listCrawler.getBookList(soup2))
        for bookurl in listCrawler.getBookList(soup2):
            book = bookRunner.crawlerBook(bookurl, "/Users/johnnytsai/Desktop/books/image/")
            """
            print("ISBN: " + ("None" if book.isbn == None else book.isbn))
            print("Name: " + ("None" if book.name == None else book.name))
            print("Name2: " + ("None" if book.name2 == None else book.name2))
            print("Author: " + ("None" if book.author == None else book.author))
            print("Author2: " + ("None" if book.author2 == None else book.author2))
            print("Translator: " + ("None" if book.translator == None else book.translator))
            print("Publisher: " + ("None" if book.publisher == None else book.publisher))
            print("PublicationDate: " + ("None" if book.publicationDate == None else book.publicationDate))
            print("Language: " + ("None" if book.language == None else book.language))
            print("Collection: " + ("None" if book.collection == None else book.collection))
            print("Specification: " + ("None" if book.specification == None else book.specification))
            print("Publication: " + ("None" if book.publication == None else book.publication))
            print("Classification: " + ("None" if book.classification == None else book.classification))