def jdholder2(queue, redis_client, JDBase, outlog, cookie=None, start=10, end=40): if cookie: JDBase.set_cookie(cookie) # JDBase.set_succeed_log_path(succeedlog) # JDBase.set_failed_log_path(failedlog) JDBase.set_redis_client(redis_client) JDBase.set_result_save_path(outlog) JDBase.set_useragent(fetch_util.get_pc_useragent()) task_id = queue.get()[0] task_id = fetch_util.byte_to_str(task_id) JDBase.set_request_path(task_id) JDBase.execute() fetch_util.print_log('process ' + JDBase.getshowlog() + '\t\n') # 获取结果是否成功 issucceed = JDBase.get_result() if issucceed: # 保存成功flag JDBase.save_succeed_log(task_id) else: # 保存失败flag JDBase.save_failed_log(task_id) # 睡眠 JDBase.sleep(start, end)
def jdholder(tasks, JDBase, succeedlog, failedlog, outlog, cookie=None, start=10, end=40): jd = JDBase # if cookie: jd.set_cookie(cookie) jd.set_succeed_log_path(succeedlog) jd.set_failed_log_path(failedlog) jd.set_result_save_path(outlog) taskslen = len(tasks) count = 0 for task in tasks: count += 1 jd.set_useragent(fetch_util.get_pc_useragent()) jd.set_request_path(task) jd.execute() fetch_util.print_log('process [' + str(count) + '/' + str(taskslen) + '] ' + ' ' + jd.getshowlog() + '\t\n') # 获取结果是否成功 issucceed = jd.get_result() if issucceed: # 保存成功flag jd.save_succeed_log(task) else: # 保存失败flag jd.save_failed_log(task) # 睡眠 jd.sleep(start, end)
def jdholder2(task, JDBase, succeedlog, failedlog, outlog, cookie=None, start=10, end=40): if cookie: JDBase.set_cookie(cookie) JDBase.set_succeed_log_path(succeedlog) JDBase.set_failed_log_path(failedlog) JDBase.set_result_save_path(outlog) JDBase.set_useragent(fetch_util.get_pc_useragent()) JDBase.set_request_path(task) JDBase.execute() fetch_util.print_log('process ' + JDBase.getshowlog() + '\t\n') # 获取结果是否成功 issucceed = JDBase.get_result() if issucceed: # 保存成功flag JDBase.save_succeed_log(task) else: # 保存失败flag JDBase.save_failed_log(task) # 睡眠 JDBase.sleep(start, end)
self.save_result(currentresult + price) fetch_util.print_log(currentresult + price) self.set_print_log(requesturl + ' succeed') self.set_result(True) # 执行 def execute(self): self.parse_html_source() pass if __name__ == '__main__': htmltext = fetch_util.urlrequest( "http://p.3.cn/prices/mgets?skuIds=J_1000017,J_&type=1", None, None, fetch_util.get_pc_useragent(), None, None, 10) print(htmltext) # html = BeautifulSoup(htmltext, "html.parser") # sources = html.find("div", {'class': 'crumb fl clearfix'}) # print(sources) # divtext = sources.get_text() print(htmltext) # divtext = sources.get_text() # currentresult = captureutil.arrangement(divtext, '\n', '') # # print("1 " + divtext) # print("2 " + currentresult) # url = 'https://item.jd.com/3499302.html' #
# -*- coding: utf-8 -*- from jumei.jumeiurlstartwithnum import JumeiUrlStartWithNum import fetch_util from bs4 import BeautifulSoup if __name__ == '__main__': dirpath = '/Users/Lan/TestDir/output/' succeed = dirpath + 'sc.log' failed = dirpath + 'fl.log' out = dirpath + 'rs.log' jumei = JumeiUrlStartWithNum() jumei.setsucceedlog(succeed) jumei.setfailedlog(failed) jumei.setshowlog(out) jumei.setcookie(None) jumei.setua(fetch_util.get_pc_useragent()) jumei.setrequestpath("847191") jumei.findsource() pass
def gethtml(self): content = fetch_util.urlrequest(self.url, None, None, useragent=fetch_util.get_pc_useragent()) self.html = BeautifulSoup(content, 'html.parser')