def xp1024_search_job(): print(time.strftime("%d %H:%M:%S", time.localtime(time.time())),"xp1024_search_job 开始----") for i in range(1, 10): http=xp1024Http.xp1024_list_crawer() http.url=xp_base_url+"/pw/thread.php?fid=5&page=" + str(i) http.pub_type="亚洲无码" BaseHttpGet.pushHttpGet(http) for i in range(1, 10): http=xp1024Http.xp1024_list_crawer() http.url=xp_base_url+"/pw/thread.php?fid=22&page=" + str(i) http.pub_type="日本骑兵" BaseHttpGet.pushHttpGet(http) for i in range(1, 10): http=xp1024Http.xp1024_list_crawer() http.url=xp_base_url+"/pw/thread.php?fid=7&page=" + str(i) http.pub_type="歐美新片" BaseHttpGet.pushHttpGet(http) #执行多线程处理 # 开启5个线程进行处理 tpool = MyThreadPool.MyThreadPool(2) for i in range(10000): if BaseHttpGet.getHttpGetPoolCount(xp1024Http.xp1024_list_crawer.__name__)==0: break tpool.callInThread(do_http, xp1024Http.xp1024_list_crawer.__name__) pass for i in range(10000): if BaseHttpGet.getHttpGetPoolCount(xp1024Http.xp1024_info_crawer.__name__) == 0: break tpool.callInThread(do_http, xp1024Http.xp1024_info_crawer.__name__) pass tpool.wait()
def update_prod_item_job(): print(time.strftime("%d %H:%M:%S", time.localtime(time.time())), "do_update_shop_create_time 开始----") job = sched.get_job(job_id="update_prod_item_job") next = int(job.next_run_time.strftime('%Y%m%d%H%M%S')) clasName = tbHttp.TBProdItemCrawer.__name__ count = BaseHttpGet.getHttpGetPoolCount(clasName) # 开启线程进行处理 tpool = MyThreadPool.MyThreadPool(10) for i in range(2000): now = int(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) if next - now < 7: print( time.strftime("%d %H:%M:%S", time.localtime(time.time())), i, "update_shop_create_time_job 结束--------------------------------------------------------" ) return tpool.callInThread(do_http, clasName) print(time.strftime("%d %H:%M:%S", time.localtime(time.time())), "do_update_shop_create_time 提前结束----") pass #如果队列中的元素为空,则加入一批到队列中 list = models.TTbShopProd.objects.filter(shopid=None)[0:5000] for p in list: http = tbHttp.TBProdItemCrawer() http.product_id = p.product_id http.uid = p.uid BaseHttpGet.pushHttpGet(http) pass print(time.strftime("%d %H:%M:%S", time.localtime(time.time())), "do_update_shop_create_time 提前结束----") pass
def prod_search_job(): print(time.strftime("%d %H:%M:%S", time.localtime(time.time())), "prod_search_job 开始----") job = sched.get_job(job_id="prod_search_job") next = int(job.next_run_time.strftime('%Y%m%d%H%M%S')) clasName = tbHttp.TBProdSearchCrawer.__name__ count = BaseHttpGet.getHttpGetPoolCount(clasName) # 如果队列中的元素为空,则加入一批到队列中 if count < 10: pass #开启40个线程进行处理 tpool = MyThreadPool.MyThreadPool(40) for i in range(10000): now = int(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) if next - now < 3: print( time.strftime("%d %H:%M:%S", time.localtime(time.time())), i, "prod_search_job 结束--------------------------------------------------------" ) return tpool.callInThread(do_http, clasName) pass #如果要提前结束,则放入一批新的查询 qlist = tbDao.random_prod_name() city = chinaCity.getFristCity() for q in qlist: if (tbpool.ProdQuerykeyExist(q)): continue prod = tbHttp.TBProdSearchCrawer() prod.pageno = 1 prod.q = q prod.city = city BaseHttpGet.pushHttpGet(prod) print(time.strftime("%d %H:%M:%S", time.localtime(time.time())), "prod_search_job 提前结束----")
def update_shop_create_time_job(): print(time.strftime("%d %H:%M:%S", time.localtime(time.time())), "do_update_shop_create_time 开始----") job = sched.get_job(job_id="update_shop_create_time_job") next = int(job.next_run_time.strftime('%Y%m%d%H%M%S')) clasName = tbHttp.TBShopCreateTimeCrawer.__name__ count = BaseHttpGet.getHttpGetPoolCount(clasName) #如果队列中的元素为空,则加入一批到队列中 if count == 0: list = models.TTbShop.objects.filter(shop_createtime=None)[0:5000] for shop in list: http = tbHttp.TBShopCreateTimeCrawer() http.shopid = shop.shopid http.isProxy = True BaseHttpGet.pushHttpGet(http) pass #开启线程进行处理 tpool = MyThreadPool.MyThreadPool(5) for i in range(10000): now = int(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) if next - now < 10: print( time.strftime("%d %H:%M:%S", time.localtime(time.time())), i, "update_shop_create_time_job 结束--------------------------------------------------------" ) return tpool.callInThread(do_http, clasName) print(time.strftime("%d %H:%M:%S", time.localtime(time.time())), "do_update_shop_create_time 提前结束----") pass