Example #1
0
import queue
import pymysql
from spider import Spider

#预置爬取的漫画总数为 258*20
total = 258
sql_insert = """insert into comic_info values ('%d',"%s","%s","%s","%s","%s","%s", NULL, NULL, NULL, NULL, NULL, NULL)"""
sql_select = """select cslug from comic_info where cid = '%d' and clastname != "%s" """
sql_update = """update comic_info set clastname = "%s", cserialise = '%d' where cid = '%d'"""
sql_update2 = """update comic_info set ctype = "%s", ccategory = "%s", carea = "%s", cupdate = "%s", cchapters = "%s", cchapterurl = "%s" where cslug = '%s'"""
threadList = ["Thread-1", "Thread-2", "Thread-3", "Thread-4"]

sp = Spider()
#尝试获取总页数
try:
    total = sp.comic_search('', '1')['_meta']['pageCount'] + 1
except:
    print('查找总页数出错')

workQueue = queue.Queue(total * 21)
# 用页码填充队列
for page in range(1, total):
    workQueue.put(page)
spiderUrls = []
threading.TIMEOUT_MAX = 10


#设置线程
class myThread(threading.Thread):
    def __init__(self, name, q, flag):
        threading.Thread.__init__(self)