Exemple #1
0
def jd_price(url):
    # ss = randnum(1000000,8888888)
    # https://p.3.cn/prices/mgets?callback=jQuery7955799&type=1&area=1_72_4137_0&pdtk=&pduid=531394193&pdpin=&pdbp=0&skuIds=J_10478786444

    start = url.rfind('/')
    end = url.rfind('.')
    num = url[start + 1:end]

    baseurl = 'http://p.3.cn/prices/mgets?callback=jQuery' + str(
            fetch_util.randnum(1000000,
                               8888888)) + '&type=1&area=1_72_4137_0&pdtk=&pduid=531394193&pdpin=&pdbp=0&skuIds=J_' + str(
            num)

    # captureutil.printlog("价格请求接口url: " + baseurl)

    # baseurl = 'http://p.3.cn/prices/mgets?skuIds=J_' + str(storeid) + ',J_&type=1'
    priceJson = fetch_util.openurl2(baseurl, refererurl=url)

    jsstart = priceJson.find('{')
    jsend = priceJson.find('}')
    priceJson = priceJson[jsstart:jsend + 1]

    if priceJson:
        # jQuery3493581([{"id":"J_10550439205","p":"79.00","m":"199.00","op":"106.00"}]);
        try:
            js = json.loads(priceJson)
            # 得到一个数组
            if 'p' in js:
                return js['p']
        except:
            return '-1.00'
    return '-1.00'
Exemple #2
0
def func(lists, maincategory, outfile):
    if lists and len(lists) > 0:

        alllen = len(lists)
        count = 0

        for currenturl in lists:

            count += 1

            pageinfo = PageInfo(currenturl)
            appname = pageinfo.getappname()
            category = pageinfo.getcategory()
            if not category:
                category = 'unknow'
            tag = pageinfo.gettag()
            if not tag:
                tag = 'unknow'

            # 打印日志
            # captureutil.printlog(currenturl + '\t' + appname)
            fetch_util.print_log('[' + str(count) + '/' + str(alllen) + '] ' + currenturl)

            outinfo = currenturl + '\t' + maincategory + '>' + appname + '\tc:' + category + '\tt:' + tag

            # 写入结果
            fetch_util.write(outinfo, outfile)

            # 随机休眠几秒
            sleep(fetch_util.randnum(10, 40))
Exemple #3
0
 def sleep(self, start, end):
     sleep(fetch_util.randnum(start, end))
Exemple #4
0
def main():

    # 指定大类别url
    outfile = '/Users/Lan/TestDir/out/wandoujia.txt'
    # 文件输出位置
    specurls = ['http://www.wandoujia.com/category/396']


    # outfile = wandoujiaconfig.outfile

    # specurls = wandoujiaconfig.specurls

    # specurls = ['http://www.wandoujia.com/category/382', 'http://www.wandoujia.com/category/388',
    #             'http://www.wandoujia.com/category/402', 'http://www.wandoujia.com/category/392']

    allurls = []

    fetch_util.print_log('update request urls ...')

    for specurl in specurls:
        maincategoryurls = MainCategoryUrls(specurl)
        url = maincategoryurls.geturls()
        allurls.append(url)

    urls = fetch_util.liststolist(allurls)

    fetch_util.print_log('update request urls finished, len: ' + str(len(urls)))

    for url in urls:

        parentpage = ParentPage(url=url)
        requesturls = parentpage.getpageurls()
        if requesturls and len(requesturls) > 0:
            for requesturl in requesturls:
                parentpageurl = ParentPageUrl(requesturl)
                # 当前主大类别
                maincategory = parentpageurl.getcategory()
                if not maincategory:
                    maincategory = 'unknow'
                # 当前页面可请求urls
                currenturls = parentpageurl.getcurpageurls()

                if currenturls and len(currenturls) > 0:

                    tasks = fetch_util.task_dispatch(currenturls, 10)

                    threads = []

                    for task in tasks:
                        th = threading.Thread(target=func, args=(task, maincategory, outfile))
                        th.start()
                        threads.append(th)
                        pass

                    for th in threads:
                        th.join()

        # 写入结果
        fetch_util.write('\r\n------ i am line -----\r\n', outfile)
        fetch_util.print_log("has finish: " + url)

        sleep(fetch_util.randnum(10, 30))