예제 #1
0
파일: jdutil.py 프로젝트: lancong/Spiders
def jdholder2(queue, redis_client, JDBase, outlog, cookie=None, start=10, end=40):
    if cookie:
        JDBase.set_cookie(cookie)
    # JDBase.set_succeed_log_path(succeedlog)
    # JDBase.set_failed_log_path(failedlog)
    JDBase.set_redis_client(redis_client)
    JDBase.set_result_save_path(outlog)
    JDBase.set_useragent(fetch_util.get_pc_useragent())
    task_id = queue.get()[0]
    task_id = fetch_util.byte_to_str(task_id)
    JDBase.set_request_path(task_id)
    JDBase.execute()

    fetch_util.print_log('process  ' + JDBase.getshowlog() + '\t\n')

    # 获取结果是否成功
    issucceed = JDBase.get_result()

    if issucceed:
        # 保存成功flag
        JDBase.save_succeed_log(task_id)
    else:
        # 保存失败flag
        JDBase.save_failed_log(task_id)

        # 睡眠
    JDBase.sleep(start, end)
예제 #2
0
파일: jdutil.py 프로젝트: lancong/Spiders
def jdholder(tasks, JDBase, succeedlog, failedlog, outlog, cookie=None, start=10, end=40):
    jd = JDBase

    # if cookie:
    jd.set_cookie(cookie)
    jd.set_succeed_log_path(succeedlog)
    jd.set_failed_log_path(failedlog)
    jd.set_result_save_path(outlog)

    taskslen = len(tasks)

    count = 0

    for task in tasks:
        count += 1

        jd.set_useragent(fetch_util.get_pc_useragent())
        jd.set_request_path(task)
        jd.execute()

        fetch_util.print_log('process [' + str(count) + '/' + str(taskslen) + '] ' + ' ' + jd.getshowlog() + '\t\n')

        # 获取结果是否成功
        issucceed = jd.get_result()

        if issucceed:
            # 保存成功flag
            jd.save_succeed_log(task)
        else:
            # 保存失败flag
            jd.save_failed_log(task)

        # 睡眠
        jd.sleep(start, end)
예제 #3
0
파일: jdmain.py 프로젝트: lancong/Spiders
def jdholder2(task, JDBase, succeedlog, failedlog, outlog, cookie=None, start=10, end=40):
    if cookie:
        JDBase.set_cookie(cookie)
    JDBase.set_succeed_log_path(succeedlog)
    JDBase.set_failed_log_path(failedlog)
    JDBase.set_result_save_path(outlog)
    JDBase.set_useragent(fetch_util.get_pc_useragent())
    JDBase.set_request_path(task)
    JDBase.execute()

    fetch_util.print_log('process  ' + JDBase.getshowlog() + '\t\n')

    # 获取结果是否成功
    issucceed = JDBase.get_result()

    if issucceed:
        # 保存成功flag
        JDBase.save_succeed_log(task)
    else:
        # 保存失败flag
        JDBase.save_failed_log(task)

        # 睡眠
    JDBase.sleep(start, end)
예제 #4
0
        self.save_result(currentresult + price)
        fetch_util.print_log(currentresult + price)
        self.set_print_log(requesturl + ' succeed')
        self.set_result(True)

    # 执行
    def execute(self):
        self.parse_html_source()


pass

if __name__ == '__main__':
    htmltext = fetch_util.urlrequest(
        "http://p.3.cn/prices/mgets?skuIds=J_1000017,J_&type=1", None, None,
        fetch_util.get_pc_useragent(), None, None, 10)
    print(htmltext)
    # html = BeautifulSoup(htmltext, "html.parser")
    # sources = html.find("div", {'class': 'crumb fl clearfix'})
    # print(sources)
    # divtext = sources.get_text()

    print(htmltext)
    # divtext = sources.get_text()
    # currentresult = captureutil.arrangement(divtext, '\n', '')
    #
    # print("1  " + divtext)
    # print("2  " + currentresult)

    # url = 'https://item.jd.com/3499302.html'
    #
예제 #5
0
# -*- coding: utf-8 -*-

from jumei.jumeiurlstartwithnum import JumeiUrlStartWithNum
import fetch_util
from bs4 import BeautifulSoup

if __name__ == '__main__':

    dirpath = '/Users/Lan/TestDir/output/'
    succeed = dirpath + 'sc.log'
    failed = dirpath + 'fl.log'
    out = dirpath + 'rs.log'

    jumei = JumeiUrlStartWithNum()

    jumei.setsucceedlog(succeed)
    jumei.setfailedlog(failed)
    jumei.setshowlog(out)

    jumei.setcookie(None)
    jumei.setua(fetch_util.get_pc_useragent())
    jumei.setrequestpath("847191")

    jumei.findsource()

    pass
예제 #6
0
파일: main.py 프로젝트: lancong/Spiders
 def gethtml(self):
     content = fetch_util.urlrequest(self.url, None, None, useragent=fetch_util.get_pc_useragent())
     self.html = BeautifulSoup(content, 'html.parser')