def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(1200) ipCheckoutThreadMount = 7 ipCollectThreadMount = 2 dataCollectThreadMount = 5 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(5) url_list = [ "http://cellbank.snu.ac.kr/english/sub/catalog.php?start=%d&page=species&CatNo=60&qry_char=a" % ((index - 1) * 10) for index in range(1, 63) ] url_que = Queue(300) for arc_url in url_list: url_que.put(arc_url) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程%d" % (i), url_que, validip_que) worker.start() print("数据采集线程%d开启" % (i)) url_que.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(900000) ipCheckoutThreadMount = 5 ipCollectThreadMount = 2 dataCollectThreadMount = 1 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(4) item_list = [ [ "麻城到合肥", "https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date=2019-02-10&leftTicketDTO.from_station=MCN&leftTicketDTO.to_station=HFH&purpose_codes=ADULT" ], [ "麻城到杭州", "https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date=2019-02-10&leftTicketDTO.from_station=MCN&leftTicketDTO.to_station=HZH&purpose_codes=ADULT" ], [ "麻城到南京", "https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date=2019-02-10&leftTicketDTO.from_station=MCN&leftTicketDTO.to_station=NJH&purpose_codes=ADULT" ] ] # url_que = Queue(2) # for arc_url in url_list: # url_que.put(arc_url) for i in range(len(item_list)): worker = Spider("数据采集线程%d" % (i), validip_que, item_list[i]) worker.start() print("数据采集线程%d开启" % (i)) validip_que.join()
def main(): ipCollectThreadMount = 2 ipCheckoutThreadMount = 10 ip_que = Queue(1200) validip_que = Queue(10000) proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(10) for i in range(100): worker = TranslateUtils("翻译线程%d" % (i), validip_que) worker.start()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(10000) imageInfo_que = Queue(700000) ipCheckoutThreadMount = 5 ipCollectThreadMount = 1 dataCollectThreadMount = 3 imageInfoCollecter = ImageInfoCollecter("图片src采集线程", imageInfo_que) imageInfoCollecter.start() proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(4) for i in range(dataCollectThreadMount): worker = Spider("图片下载线程%d" % (i), imageInfo_que, validip_que) worker.start() worker.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(10000) ipCheckoutThreadMount = 20 ipCollectThreadMount = 3 dataCollectThreadMount = 40 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(20) url_list = [] url_que = Queue(80000) for url in url_list: url_que.put(url) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程%d" % (i), url_que, validip_que) worker.start() print("数据采集线程%d开启" % (i)) url_que.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(100000) ipCheckoutThreadMount = 25 ipCollectThreadMount = 3 dataCollectThreadMount = 60 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(10) url_list = failed_category_list_url url_que = Queue(5000) for arc_url in url_list: url_que.put(arc_url) print(url_que.qsize()) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程{}".format(i), url_que, validip_que) worker.start() print("数据采集线程{}开启".format(i)) url_que.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(10000) ipCheckoutThreadMount = 30 ipCollectThreadMount = 3 dataCollectThreadMount = 100 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(40) url_que = Queue(80000) with open('urls.txt', 'r') as f: urlsStr = f.read() urlsArr = urlsStr.split('\n') for url in urlsArr: url_que.put(url) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程%d" % (i), url_que, validip_que) worker.start() url_que.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(10000) ipCheckoutThreadMount = 50 ipCollectThreadMount = 2 dataCollectThreadMount = 89 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(5) url_list = [ "https://www.huiyi8.com/qqbq/{}.html".format(index) for index in range(1, 20) ] url_que = Queue(1000) for url in url_list: url_que.put(url) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程%d" % (i), url_que, validip_que) worker.start() print("数据采集线程%d开启" % (i)) url_que.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(100000) ipCheckoutThreadMount = 10 ipCollectThreadMount = 1 dataCollectThreadMount = 20 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(20) url_list = [ "http://www.bee-ji.com/detail/%d.html" % (index) for index in range(10000, 20000) ] url_que = Queue(210000) for arc_url in url_list: url_que.put(arc_url) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程%d" % (i), url_que, validip_que) worker.start() print("数据采集线程%d开启" % (i)) url_que.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(10000) ipCheckoutThreadMount = 20 ipCollectThreadMount = 2 dataCollectThreadMount = 60 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(1) url_list = ["https://www.doutula.com/article/list/?page=%d"% (index) for index in range(320,603)] # url_list=[ # "https://www.doutula.com/article/list/?page=197", # "https://www.doutula.com/article/list/?page=202", # "https://www.doutula.com/article/list/?page=206", # "https://www.doutula.com/article/list/?page=210", # "https://www.doutula.com/article/list/?page=213", # "https://www.doutula.com/article/list/?page=216", # "https://www.doutula.com/article/list/?page=217", # "https://www.doutula.com/article/list/?page=218", # "https://www.doutula.com/article/list/?page=219", # "https://www.doutula.com/article/list/?page=220", # "https://www.doutula.com/article/list/?page=221", # "https://www.doutula.com/article/list/?page=222", # "https://www.doutula.com/article/list/?page=224", # "https://www.doutula.com/article/list/?page=225", # "https://www.doutula.com/article/list/?page=231", # "https://www.doutula.com/article/list/?page=232", # "https://www.doutula.com/article/list/?page=233", # "https://www.doutula.com/article/list/?page=235", # "https://www.doutula.com/article/list/?page=236", # "https://www.doutula.com/article/list/?page=237", # "https://www.doutula.com/article/list/?page=238", # "https://www.doutula.com/article/list/?page=239", # "https://www.doutula.com/article/list/?page=241", # "https://www.doutula.com/article/list/?page=243", # "https://www.doutula.com/article/list/?page=244", # "https://www.doutula.com/article/list/?page=245", # "https://www.doutula.com/article/list/?page=246", # "https://www.doutula.com/article/list/?page=247", # "https://www.doutula.com/article/list/?page=248", # "https://www.doutula.com/article/list/?page=249", # "https://www.doutula.com/article/list/?page=250", # "https://www.doutula.com/article/list/?page=251", # "https://www.doutula.com/article/list/?page=252", # "https://www.doutula.com/article/list/?page=253", # "https://www.doutula.com/article/list/?page=254", # "https://www.doutula.com/article/list/?page=255", # "https://www.doutula.com/article/list/?page=256", # "https://www.doutula.com/article/list/?page=257", # "https://www.doutula.com/article/list/?page=258", # "https://www.doutula.com/article/list/?page=259", # "https://www.doutula.com/article/list/?page=260", # "https://www.doutula.com/article/list/?page=261", # "https://www.doutula.com/article/list/?page=262", # "https://www.doutula.com/article/list/?page=263", # "https://www.doutula.com/article/list/?page=265", # "https://www.doutula.com/article/list/?page=266", # "https://www.doutula.com/article/list/?page=267", # "https://www.doutula.com/article/list/?page=268", # "https://www.doutula.com/article/list/?page=269", # "https://www.doutula.com/article/list/?page=270", # "https://www.doutula.com/article/list/?page=271", # "https://www.doutula.com/article/list/?page=272", # "https://www.doutula.com/article/list/?page=273", # "https://www.doutula.com/article/list/?page=274", # "https://www.doutula.com/article/list/?page=275", # "https://www.doutula.com/article/list/?page=276", # "https://www.doutula.com/article/list/?page=277", # "https://www.doutula.com/article/list/?page=278", # "https://www.doutula.com/article/list/?page=279", # "https://www.doutula.com/article/list/?page=280", # "https://www.doutula.com/article/list/?page=281", # "https://www.doutula.com/article/list/?page=282", # "https://www.doutula.com/article/list/?page=283", # "https://www.doutula.com/article/list/?page=284", # "https://www.doutula.com/article/list/?page=285", # "https://www.doutula.com/article/list/?page=286", # "https://www.doutula.com/article/list/?page=287", # "https://www.doutula.com/article/list/?page=289", # "https://www.doutula.com/article/list/?page=290", # "https://www.doutula.com/article/list/?page=291", # "https://www.doutula.com/article/list/?page=292", # "https://www.doutula.com/article/list/?page=293", # "https://www.doutula.com/article/list/?page=295", # "https://www.doutula.com/article/list/?page=296", # "https://www.doutula.com/article/list/?page=297", # "https://www.doutula.com/article/list/?page=298", # "https://www.doutula.com/article/list/?page=299", # "https://www.doutula.com/article/list/?page=300", # "https://www.doutula.com/article/list/?page=301", # "https://www.doutula.com/article/list/?page=302", # "https://www.doutula.com/article/list/?page=303", # "https://www.doutula.com/article/list/?page=304", # "https://www.doutula.com/article/list/?page=305", # "https://www.doutula.com/article/list/?page=306", # "https://www.doutula.com/article/list/?page=307", # "https://www.doutula.com/article/list/?page=308", # "https://www.doutula.com/article/list/?page=309", # "https://www.doutula.com/article/list/?page=310", # "https://www.doutula.com/article/list/?page=311", # "https://www.doutula.com/article/list/?page=312", # "https://www.doutula.com/article/list/?page=313", # "https://www.doutula.com/article/list/?page=314", # "https://www.doutula.com/article/list/?page=315", # "https://www.doutula.com/article/list/?page=316", # "https://www.doutula.com/article/list/?page=317", # "https://www.doutula.com/article/list/?page=318", # "https://www.doutula.com/article/list/?page=319", # "https://www.doutula.com/article/list/?page=320", # "https://www.doutula.com/article/list/?page=321", # "https://www.doutula.com/article/list/?page=322", # "https://www.doutula.com/article/list/?page=323", # "https://www.doutula.com/article/list/?page=324", # "https://www.doutula.com/article/list/?page=325", # "https://www.doutula.com/article/list/?page=326", # "https://www.doutula.com/article/list/?page=327", # "https://www.doutula.com/article/list/?page=328", # "https://www.doutula.com/article/list/?page=329", # "https://www.doutula.com/article/list/?page=330", # "https://www.doutula.com/article/list/?page=331", # "https://www.doutula.com/article/list/?page=332", # "https://www.doutula.com/article/list/?page=333", # "https://www.doutula.com/article/list/?page=334", # "https://www.doutula.com/article/list/?page=335", # "https://www.doutula.com/article/list/?page=336", # "https://www.doutula.com/article/list/?page=337", # "https://www.doutula.com/article/list/?page=338", # "https://www.doutula.com/article/list/?page=339", # "https://www.doutula.com/article/list/?page=341", # "https://www.doutula.com/article/list/?page=342", # "https://www.doutula.com/article/list/?page=343", # "https://www.doutula.com/article/list/?page=345", # "https://www.doutula.com/article/list/?page=346", # "https://www.doutula.com/article/list/?page=347", # "https://www.doutula.com/article/list/?page=348", # "https://www.doutula.com/article/list/?page=349", # "https://www.doutula.com/article/list/?page=350", # "https://www.doutula.com/article/list/?page=351", # "https://www.doutula.com/article/list/?page=352", # "https://www.doutula.com/article/list/?page=353", # "https://www.doutula.com/article/list/?page=354", # "https://www.doutula.com/article/list/?page=356", # "https://www.doutula.com/article/list/?page=357", # "https://www.doutula.com/article/list/?page=358", # "https://www.doutula.com/article/list/?page=359", # "https://www.doutula.com/article/list/?page=361", # "https://www.doutula.com/article/list/?page=363", # "https://www.doutula.com/article/list/?page=364", # "https://www.doutula.com/article/list/?page=366", # "https://www.doutula.com/article/list/?page=367", # "https://www.doutula.com/article/list/?page=369", # "https://www.doutula.com/article/list/?page=370", # "https://www.doutula.com/article/list/?page=371", # "https://www.doutula.com/article/list/?page=373", # "https://www.doutula.com/article/list/?page=374", # "https://www.doutula.com/article/list/?page=375", # "https://www.doutula.com/article/list/?page=376", # "https://www.doutula.com/article/list/?page=379", # "https://www.doutula.com/article/list/?page=380", # "https://www.doutula.com/article/list/?page=381", # "https://www.doutula.com/article/list/?page=382", # "https://www.doutula.com/article/list/?page=383", # "https://www.doutula.com/article/list/?page=384", # "https://www.doutula.com/article/list/?page=385", # "https://www.doutula.com/article/list/?page=388", # "https://www.doutula.com/article/list/?page=390", # "https://www.doutula.com/article/list/?page=391", # "https://www.doutula.com/article/list/?page=392", # "https://www.doutula.com/article/list/?page=393", # "https://www.doutula.com/article/list/?page=394", # "https://www.doutula.com/article/list/?page=396", # "https://www.doutula.com/article/list/?page=400", # "https://www.doutula.com/article/list/?page=401", # "https://www.doutula.com/article/list/?page=402", # "https://www.doutula.com/article/list/?page=406", # "https://www.doutula.com/article/list/?page=407", # "https://www.doutula.com/article/list/?page=408", # "https://www.doutula.com/article/list/?page=411", # "https://www.doutula.com/article/list/?page=413", # "https://www.doutula.com/article/list/?page=414", # "https://www.doutula.com/article/list/?page=415", # "https://www.doutula.com/article/list/?page=416", # "https://www.doutula.com/article/list/?page=417", # "https://www.doutula.com/article/list/?page=418", # "https://www.doutula.com/article/list/?page=419", # "https://www.doutula.com/article/list/?page=420", # "https://www.doutula.com/article/list/?page=422", # "https://www.doutula.com/article/list/?page=424", # "https://www.doutula.com/article/list/?page=425", # "https://www.doutula.com/article/list/?page=426", # "https://www.doutula.com/article/list/?page=427", # "https://www.doutula.com/article/list/?page=428", # "https://www.doutula.com/article/list/?page=430", # "https://www.doutula.com/article/list/?page=432", # "https://www.doutula.com/article/list/?page=433", # "https://www.doutula.com/article/list/?page=434", # "https://www.doutula.com/article/list/?page=435", # "https://www.doutula.com/article/list/?page=436", # "https://www.doutula.com/article/list/?page=440", # "https://www.doutula.com/article/list/?page=442", # "https://www.doutula.com/article/list/?page=443", # "https://www.doutula.com/article/list/?page=444", # "https://www.doutula.com/article/list/?page=445", # "https://www.doutula.com/article/list/?page=446", # "https://www.doutula.com/article/list/?page=449", # "https://www.doutula.com/article/list/?page=450", # "https://www.doutula.com/article/list/?page=452", # "https://www.doutula.com/article/list/?page=453", # "https://www.doutula.com/article/list/?page=456", # "https://www.doutula.com/article/list/?page=458", # "https://www.doutula.com/article/list/?page=459", # "https://www.doutula.com/article/list/?page=460", # "https://www.doutula.com/article/list/?page=461", # "https://www.doutula.com/article/list/?page=462", # "https://www.doutula.com/article/list/?page=463", # "https://www.doutula.com/article/list/?page=464", # "https://www.doutula.com/article/list/?page=465", # "https://www.doutula.com/article/list/?page=466", # "https://www.doutula.com/article/list/?page=467", # "https://www.doutula.com/article/list/?page=468", # "https://www.doutula.com/article/list/?page=469", # "https://www.doutula.com/article/list/?page=470", # "https://www.doutula.com/article/list/?page=471", # "https://www.doutula.com/article/list/?page=472", # "https://www.doutula.com/article/list/?page=474", # "https://www.doutula.com/article/list/?page=475", # "https://www.doutula.com/article/list/?page=476", # "https://www.doutula.com/article/list/?page=477", # "https://www.doutula.com/article/list/?page=478", # "https://www.doutula.com/article/list/?page=481", # "https://www.doutula.com/article/list/?page=482", # "https://www.doutula.com/article/list/?page=483", # "https://www.doutula.com/article/list/?page=484", # "https://www.doutula.com/article/list/?page=486", # "https://www.doutula.com/article/list/?page=498", # "https://www.doutula.com/article/list/?page=501", # "https://www.doutula.com/article/list/?page=502", # "https://www.doutula.com/article/list/?page=503", # "https://www.doutula.com/article/list/?page=506", # "https://www.doutula.com/article/list/?page=508", # "https://www.doutula.com/article/list/?page=510", # "https://www.doutula.com/article/list/?page=511", # "https://www.doutula.com/article/list/?page=514", # "https://www.doutula.com/article/list/?page=516", # "https://www.doutula.com/article/list/?page=520", # "https://www.doutula.com/article/list/?page=521", # "https://www.doutula.com/article/list/?page=522", # "https://www.doutula.com/article/list/?page=523", # "https://www.doutula.com/article/list/?page=524", # "https://www.doutula.com/article/list/?page=525", # "https://www.doutula.com/article/list/?page=527", # "https://www.doutula.com/article/list/?page=528", # "https://www.doutula.com/article/list/?page=529", # "https://www.doutula.com/article/list/?page=531", # "https://www.doutula.com/article/list/?page=532", # "https://www.doutula.com/article/list/?page=533", # "https://www.doutula.com/article/list/?page=535", # "https://www.doutula.com/article/list/?page=537", # "https://www.doutula.com/article/list/?page=538", # "https://www.doutula.com/article/list/?page=540", # "https://www.doutula.com/article/list/?page=541", # "https://www.doutula.com/article/list/?page=542", # "https://www.doutula.com/article/list/?page=543", # "https://www.doutula.com/article/list/?page=544", # "https://www.doutula.com/article/list/?page=545", # "https://www.doutula.com/article/list/?page=547", # "https://www.doutula.com/article/list/?page=555", # "https://www.doutula.com/article/list/?page=557", # "https://www.doutula.com/article/list/?page=559", # "https://www.doutula.com/article/list/?page=561", # "https://www.doutula.com/article/list/?page=565", # "https://www.doutula.com/article/list/?page=566", # "https://www.doutula.com/article/list/?page=567", # "https://www.doutula.com/article/list/?page=569", # "https://www.doutula.com/article/list/?page=570", # "https://www.doutula.com/article/list/?page=571", # "https://www.doutula.com/article/list/?page=572", # "https://www.doutula.com/article/list/?page=573", # "https://www.doutula.com/article/list/?page=574", # "https://www.doutula.com/article/list/?page=578", # "https://www.doutula.com/article/list/?page=579" # # ] url_que = Queue(1000) for arc_url in url_list: url_que.put(arc_url) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程%d" % (i), url_que, validip_que) worker.start() print("数据采集线程%d开启" % (i)) url_que.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(10000) ipCheckoutThreadMount = 20 ipCollectThreadMount = 2 dataCollectThreadMount = 44 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(10) url_list = [ "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/1/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/2/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/3/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/4/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/5/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/6/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/7/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/8/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/9/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/10/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/11/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/12/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/13/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/14/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/15/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/16/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/17/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/18/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/19/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/20/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/21/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/22/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/23/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/24/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/25/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/26/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/27/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/28/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/29/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/30/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/31/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/32/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/33/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/34/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/35/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/36/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/37/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/38/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/39/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/40/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/41/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/42/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/43/", "http://www.lanrentuku.com/sort/%B1%ED%C7%E9%B0%FC/44/" ] url_que = Queue(1000) for arc_url in url_list: url_que.put(arc_url) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程%d" % (i), url_que, validip_que) worker.start() print("数据采集线程%d开启" % (i)) url_que.join()
def main(): # 开启多线程采集代理IP,并放置于代理IP的队列ipproxy_que里 ip_que = Queue(1200) validip_que = Queue(10000) ipCheckoutThreadMount = 40 ipCollectThreadMount = 2 dataCollectThreadMount = 100 proxy_helper = Proxy_helper(ip_que, validip_que, ipCheckoutThreadMount, ipCollectThreadMount) proxy_helper.run() time.sleep(20) # url_list = [" "http://www.qqjia.com/biaoqing/index%d.htm","%(index) for index in range(2,14)] url_list = [ "http://www.qqjia.com//htm/hao24966.htm", "http://www.qqjia.com//htm/hao24965.htm", "http://www.qqjia.com//htm/hao24964.htm", "http://www.qqjia.com//htm/hao24847.htm", "http://www.qqjia.com//htm/hao24846.htm", "http://www.qqjia.com//htm/hao24845.htm", "http://www.qqjia.com//htm/hao24798.htm", "http://www.qqjia.com//learn/qiubilong.htm", "http://qq.qqjia.com/bq/ali.htm", "http://qq.qqjia.com/bq/dingdang.htm", "http://qq.qqjia.com/bq/zhaocai.htm", "http://qq.qqjia.com/bq/dubao.htm", "http://www.qqjia.com//learn/fayuebing.htm", "http://www.qqjia.com//learn/bqjr41.htm", "http://qq.qqjia.com/bq/xiaoyaoji.htm", "http://qq.qqjia.com/bq/egao.htm", "http://www.qqjia.com//learn/bqjr101.htm", "http://www.qqjia.com//learn/bqjr1225.htm", "http://qq.qqjia.com/bq/christmas2.htm", "http://www.qqjia.com//learn/HMstyle.htm", "http://www.qqjia.com//learn/bqjr214.htm", "http://qq.qqjia.com/bq/yuanfang.htm", "http://qq.qqjia.com/bq/aduo.htm", "http://qq.qqjia.com/bq/christmas.htm", "http://qq.qqjia.com/bq/jr1111b.htm", "http://qq.qqjia.com/bq/wodi.htm", "http://qq.qqjia.com/bq/riben.htm", "http://qq.qqjia.com/bq/xxhu.htm", "http://www.qqjia.com//learn/bqjr1001.htm", "http://qq.qqjia.com/bq/baozou.htm", "http://qq.qqjia.com/bq/guoqing.htm", "http://qq.qqjia.com/bq/jr910b.htm", "http://qq.qqjia.com/bq/zhongqiu.htm", "http://qq.qqjia.com/bq/jr77b.htm", "http://www.qqjia.com//learn/bqjr910.htm", "http://qq.qqjia.com/bq/wabishi.htm", "http://www.qqjia.com//learn/bq28.htm", "http://www.qqjia.com//learn/bq26.htm", "http://www.qqjia.com//learn/bq21.htm", "http://www.qqjia.com//learn/bq20.htm", "http://www.qqjia.com//learn/bq10.htm", "http://www.qqjia.com//learn/bq06.htm", "http://www.qqjia.com//learn/bq09.htm", "http://www.qqjia.com//learn/bq03.htm", "http://qq.qqjia.com/bq/mogui.htm", "http://qq.qqjia.com/bq/jr1031b.htm", "http://qq.qqjia.com/bq/ge.htm", "http://qq.qqjia.com/bq/xigua.htm", "http://qq.qqjia.com/bq/yu.htm", "http://qq.qqjia.com/bq/pitou.htm", "http://qq.qqjia.com/bq/danwen.htm", "http://qq.qqjia.com/bq/danhuang.htm", "http://qq.qqjia.com/bq/wulagui.htm", "http://qq.qqjia.com/bq/Sinbo.htm", "http://qq.qqjia.com/bq/ruiqie.htm", "http://qq.qqjia.com/bq/miqi.htm", "http://qq.qqjia.com/bq/chaoren.htm", "http://qq.qqjia.com/bq/spl.htm", "http://qq.qqjia.com/bq/zgl.htm", "http://qq.qqjia.com/bq/612.htm", "http://qq.qqjia.com/bq/xyq.htm", "http://qq.qqjia.com/bq/tuzibang.htm", "http://www.qqjia.com//learn/bq27.htm", "http://qq.qqjia.com/bq/Copycat.htm", "http://qq.qqjia.com/bq/sha.htm", "http://qq.qqjia.com/bq/habao.htm", "http://qq.qqjia.com/bq/qqnc.htm", "http://qq.qqjia.com/bq/qunzhu.htm", "http://qq.qqjia.com/bq/paobin3.htm", "http://qq.qqjia.com/bq/paobin2.htm", "http://qq.qqjia.com/bq/paobin1.htm", "http://qq.qqjia.com/bq/pangxie.htm", "http://qq.qqjia.com/bq/shengri.htm", "http://qq.qqjia.com/bq/xingmao.htm", "http://qq.qqjia.com/bq/shua.htm", "http://qq.qqjia.com/bq/yoci2.htm", "http://qq.qqjia.com/bq/yoci.htm", "http://qq.qqjia.com/bq/zhuai.htm", "http://qq.qqjia.com/bq/bbgou.htm", "http://qq.qqjia.com/bq/ziziji.htm", "http://qq.qqjia.com/bq/tusiji.htm", "http://qq.qqjia.com/bq/aoyun.htm", "http://qq.qqjia.com/bq/qq2007.htm", "http://qq.qqjia.com/bq/bijia.htm", "http://qq.qqjia.com/bq/bianxing.htm", "http://qq.qqjia.com/bq/mogutou.htm", "http://qq.qqjia.com/bq/zhumm.htm", "http://qq.qqjia.com/bq/benko.htm", "http://qq.qqjia.com/bq/hundun.htm", "http://qq.qqjia.com/bq/PUCCA.htm", "http://qq.qqjia.com/bq/gxylk.htm", "http://qq.qqjia.com/bq/yongbao.htm", "http://qq.qqjia.com/bq/iPadQQ.htm", "http://qq.qqjia.com/bq/bainian.htm", "http://qq.qqjia.com/bq/jr214b.htm", "http://qq.qqjia.com/bq/keaidw.htm", "http://qq.qqjia.com/bq/keaidw2.htm", "http://qq.qqjia.com/bq/kjqd.htm", "http://qq.qqjia.com/bq/zhongqiu2.htm", "http://qq.qqjia.com/bq/xiyangyang.htm", "http://qq.qqjia.com/bq/qiu.htm", "http://www.qqjia.com//learn/bqjr77.htm", "http://qq.qqjia.com/bq/xcb.htm", "http://qq.qqjia.com/bq/jr41b.htm", "http://qq.qqjia.com/bq/labi.htm", "http://qq.qqjia.com/bq/chaye.htm", "http://qq.qqjia.com/bq/jr38b.htm", "http://qq.qqjia.com/bq/chunjie.htm", "http://qq.qqjia.com/bq/xcj.htm", "http://qq.qqjia.com/bq/ytmm.htm", "http://qq.qqjia.com/bq/jiaozi.htm", "http://qq.qqjia.com/bq/yutumei.htm", "http://qq.qqjia.com/bq/fangkuai.htm", "http://qq.qqjia.com/bq/momo.htm", "http://qq.qqjia.com/bq/haidi.htm", "http://qq.qqjia.com/bq/duyan.htm", "http://www.qqjia.com//htm/hao33742.htm", "http://www.qqjia.com//learn/bqjr11.htm", "http://qq.qqjia.com/bq/jr11b.htm", "http://qq.qqjia.com/bq/yuandan.htm", "http://www.qqjia.com//learn/bqjr815.htm", "http://www.qqjia.com//htm/hao29095.htm", "http://www.qqjia.com//htm/hao29013.htm", "http://www.qqjia.com//htm/hao29012.htm", "http://www.qqjia.com//htm/hao27510.htm", "http://www.qqjia.com//htm/hao27509.htm", "http://www.qqjia.com//htm/hao26984.htm", "http://www.qqjia.com//htm/hao26983.htm", "http://www.qqjia.com//htm/hao25819.htm", "http://www.qqjia.com//htm/hao25817.htm", "http://www.qqjia.com//htm/hao25816.htm", "http://www.qqjia.com//htm/hao25725.htm", "http://www.qqjia.com//htm/hao25724.htm", "http://www.qqjia.com//htm/hao25675.htm", "http://www.qqjia.com//htm/hao25674.htm", "http://www.qqjia.com//htm/hao25673.htm", "http://www.qqjia.com//htm/hao25672.htm", "http://www.qqjia.com//htm/hao25568.htm", "http://www.qqjia.com//htm/hao25567.htm", "http://www.qqjia.com//htm/hao25425.htm", "http://qq.qqjia.com/bq/zhenggu.htm", "http://qq.qqjia.com/bq/kongbu.htm", "http://qq.qqjia.com/bq/yeman.htm", "http://qq.qqjia.com/bq/qqqun.htm", "http://qq.qqjia.com/bq/pohai.htm", "http://qq.qqjia.com/bq/zaobs.htm", "http://qq.qqjia.com/bq/dianhou.htm", "http://qq.qqjia.com/bq/hongyt.htm", "http://qq.qqjia.com/bq/milaoshu.htm", "http://qq.qqjia.com/bq/jr11zhu.htm", "http://qq.qqjia.com/bq/kitten.htm", "http://qq.qqjia.com/bq/gxbq.htm", "http://qq.qqjia.com/bq/petzhu.htm", "http://qq.qqjia.com/bq/2006logo.htm", "http://qq.qqjia.com/bq/youxi.htm", "http://qq.qqjia.com/bq/leonc.htm", "http://qq.qqjia.com/bq/leonb.htm", "http://qq.qqjia.com/bq/leona.htm", "http://qq.qqjia.com/bq/tudou.htm", "http://qq.qqjia.com/bq/Dori.htm", "http://qq.qqjia.com/bq/xihahou.htm", "http://qq.qqjia.com/bq/keaikonlon.htm", "http://qq.qqjia.com/bq/xiaoqiang.htm", "http://qq.qqjia.com/bq/keaihg.htm", "http://qq.qqjia.com/bq/baoer.htm", "http://qq.qqjia.com/bq/xiongmao.htm", "http://qq.qqjia.com/bq/xiaojiji.htm", "http://qq.qqjia.com/bq/shoushi.htm", "http://qq.qqjia.com/bq/Mocmoc.htm", "http://qq.qqjia.com/bq/caizhong.htm", "http://qq.qqjia.com/bq/caiabc.htm", "http://qq.qqjia.com/bq/kaka.htm", "http://qq.qqjia.com/bq/huangdi.htm", "http://qq.qqjia.com/bq/box.htm", "http://qq.qqjia.com/bq/huoju.htm", "http://qq.qqjia.com/bq/jxry.htm", "http://qq.qqjia.com/bq/hellocai.htm", "http://qq.qqjia.com/bq/gupiao.htm", "http://qq.qqjia.com/bq/upup.htm", "http://qq.qqjia.com/bq/xiaohuimao.htm", "http://qq.qqjia.com/bq/naitouzai.htm", "http://qq.qqjia.com/bq/lvtoujin.htm", "http://qq.qqjia.com/bq/yangcong.htm", "http://qq.qqjia.com/bq/moguai.htm", "http://qq.qqjia.com/bq/caicai.htm", "http://qq.qqjia.com/bq/quhou.htm", "http://qq.qqjia.com/bq/qumao.htm", "http://qq.qqjia.com/bq/qutu.htm", "http://www.qqjia.com//learn/bqjr520.htm", "http://www.qqjia.com//learn/bq12.htm", "http://www.qqjia.com//learn/bq05.htm", "http://www.qqjia.com//learn/bq13.htm", "http://www.qqjia.com//learn/bq14.htm", "http://www.qqjia.com//learn/bq15.htm", "http://www.qqjia.com//learn/bq04.htm", "http://qq.qqjia.com/bq/gongxi.htm", "http://www.qqjia.com//learn/bq17.htm", "http://www.qqjia.com//learn/bq18.htm", "http://www.qqjia.com//learn/bq19.htm", "http://www.qqjia.com//learn/bq23.htm", "http://www.qqjia.com//learn/bq22.htm", "http://www.qqjia.com//learn/bq25.htm", "http://www.qqjia.com//learn/bqsr.htm", "http://www.qqjia.com//learn/bq24.htm", "http://www.qqjia.com//learn/bqmms1.htm", "http://www.qqjia.com//learn/bqmms2.htm", "http://www.qqjia.com//learn/bqmms3.htm", "http://www.qqjia.com//learn/bqmms4.htm", "http://www.qqjia.com//learn/bqmms5.htm", "http://www.qqjia.com//learn/bqmms6.htm", "http://www.qqjia.com//learn/bqmms7.htm", "http://www.qqjia.com//learn/bqmms8.htm", "http://qq.qqjia.com/bq/qingwa.htm", "http://qq.qqjia.com/bq/zhutou.htm", "http://qq.qqjia.com/bq/heibaizhu.htm", "http://qq.qqjia.com/bq/huahua.htm", "http://qq.qqjia.com/bq/chaonv.htm", "http://qq.qqjia.com/bq/chaonv2.htm", "http://qq.qqjia.com/bq/aishui.htm", "http://qq.qqjia.com/bq/coolfeng3.htm", "http://qq.qqjia.com/bq/coolfeng2.htm", "http://qq.qqjia.com/bq/xiaohai.htm", "http://qq.qqjia.com/bq/set3.htm", "http://qq.qqjia.com/bq/cywenzi.htm", "http://qq.qqjia.com/bq/jdrenwu.htm", "http://qq.qqjia.com/bq/niuniu.htm", "http://qq.qqjia.com/bq/shm.htm", "http://qq.qqjia.com/bq/datou.htm", "http://qq.qqjia.com/bq/baozi.htm", "http://qq.qqjia.com/bq/yctdtt.htm", "http://qq.qqjia.com/bq/keainv.htm", "http://qq.qqjia.com/bq/tqqq.htm", "http://qq.qqjia.com/bq/tqqq2.htm", "http://qq.qqjia.com/bq/xingfa.htm", "http://qq.qqjia.com/bq/suihai.htm", "http://qq.qqjia.com/bq/sanmao.htm", "http://qq.qqjia.com/bq/bao.htm", "http://qq.qqjia.com/bq/kittenxin.htm", "http://qq.qqjia.com/bq/kittenbaozi.htm", "http://qq.qqjia.com/bq/kittenyuqi.htm", "http://qq.qqjia.com/bq/wanwan.htm", "http://www.qqjia.com//learn/bq16.htm", "http://qq.qqjia.com/bq/fuqin.htm", "http://qq.qqjia.com/bq/boto.htm", "http://qq.qqjia.com/bq/jr61b.htm", "http://qq.qqjia.com/bq/yinshi.htm", "http://qq.qqjia.com/bq/kittenzimu.htm", "http://qq.qqjia.com/bq/gxcx.htm", "http://qq.qqjia.com/bq/xiaoqq.htm", "http://qq.qqjia.com/bq/maomi.htm", "http://qq.qqjia.com/bq/rmb.htm", "http://qq.qqjia.com/bq/jss.htm", "http://qq.qqjia.com/bq/zxw.htm", "http://qq.qqjia.com/bq/xiaoji.htm", "http://qq.qqjia.com/bq/zhenzi.htm", "http://www.qqjia.com//learn/bqjr51.htm", "http://qq.qqjia.com/bq/jr51b.htm", "http://qq.qqjia.com/bq/konglong.htm", "http://qq.qqjia.com/bq/guoqing2.htm", "http://qq.qqjia.com/bq/ktmb.htm", "http://www.qqjia.com//learn/bqjr61.htm", "http://qq.qqjia.com/bq/qggqmm.htm", "http://www.qqjia.com//learn/laobq.htm", "http://qq.qqjia.com/bq/wsxiong.htm", "http://www.qqjia.com//learn/bqjr38.htm", "http://qq.qqjia.com/bq/erka.htm", "http://qq.qqjia.com/bq/jr520b.htm", "http://qq.qqjia.com/bq/jieji.htm", "http://qq.qqjia.com/bq/jr55b.htm", "http://qq.qqjia.com/bq/tuzi.htm", "http://www.qqjia.com//learn/bqjr620.htm", "http://qq.qqjia.com/bq/shengxiao.htm", "http://www.qqjia.com//learn/bqjr55.htm", "http://qq.qqjia.com/bq/youzi.htm", "http://www.qqjia.com//learn/bq01.htm", "http://qq.qqjia.com/bq/qqpet.htm", "http://www.qqjia.com//learn/bq02.htm", "http://www.qqjia.com//learn/bqjr115.htm", "http://www.qqjia.com//learn/bq07.htm", "http://www.qqjia.com//learn/bq08.htm", "http://www.qqjia.com//learn/bq11.htm", "http://qq.qqjia.com/bq/jr115b.htm", "http://qq.qqjia.com/bq/wsm.htm", "http://qq.qqjia.com/bq/haha.htm", "http://qq.qqjia.com/bq/aoao.htm", "http://qq.qqjia.com/bq/pb.htm", "http://qq.qqjia.com/bq/moumouniu.htm", "http://qq.qqjia.com/bq/woniu.htm", "http://qq.qqjia.com/bq/zhuzhuxia.htm", "http://qq.qqjia.com/bq/maomaoshu.htm", "http://qq.qqjia.com/bq/maomaoshu3D.htm", "http://qq.qqjia.com/bq/xiaobai.htm", "http://qq.qqjia.com/bq/kaka2.htm", "http://qq.qqjia.com/bq/diandian.htm", "http://qq.qqjia.com/bq/txb.htm", "http://qq.qqjia.com/bq/xsy.htm", "http://qq.qqjia.com/bq/xiaoK.htm", "http://qq.qqjia.com/bq/SuperPower.htm", "http://qq.qqjia.com/bq/wanggou.htm", "http://qq.qqjia.com/bq/zfg.htm", "http://qq.qqjia.com/bq/qbaycat.htm", "http://qq.qqjia.com/bq/youa.htm", "http://qq.qqjia.com/bq/jiujiu.htm", "http://qq.qqjia.com/bq/qun.htm", "http://qq.qqjia.com/bq/zheng.htm", "http://qq.qqjia.com/bq/yiwai.htm", "http://www.qqjia.com//htm/hao25424.htm", "http://www.qqjia.com//htm/hao25423.htm", "http://www.qqjia.com//htm/hao25422.htm", "http://www.qqjia.com//htm/hao24967.htm", ] url_que = Queue(1000) # url_que.put(" "http://www.qqjia.com/biaoqing/") for arc_url in url_list: url_que.put(arc_url) for i in range(dataCollectThreadMount): worker = Spider("数据采集线程%d" % (i), url_que, validip_que) worker.start() print("数据采集线程%d开启" % (i)) url_que.join()