Example #1
0
# coding: utf-8

import os
import time
import threading
import json
from multiprocessing import Manager
from logger import MyLog
from common import updateCurrencyRate
from const import *

curpath = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
logpath = os.path.join(curpath, "logger")
main_logger = MyLog(loggerName='main',
                    nameTail='main',
                    logPath=logpath,
                    debug=True)
thread_logger = MyLog(loggerName='thread',
                      nameTail='thread',
                      logPath=logpath,
                      debug=True)

manager = Manager()
MCURRENCY_RATES = manager.dict(CURRENCY_RATES)

mthread = threading.Thread(target=updateCurrencyRate,
                           args=(CURRENCY_REQ_URL, MCURRENCY_RATES))
mthread.setDaemon(True)
mthread.start()

print MCURRENCY_RATES
Example #2
0
            asin_num, rank = parse_html(asin_str, list_html)
            # asin_num == 0,表明该页没有商品数据
            if asin_num == 0:
                save_rank(asin_str, keyword_str, None)
                break

            if rank is None:
                # 当页不存在该asin时请求下一页
                current_page += 1
                if current_page == MAX_SPIDER_PAGE:
                    # 如果到了第20页还查不到该asin,则排名设置为300
                    save_rank(asin_str, keyword_str, MAX_RANK)
                    break
                time.sleep(WAIT_REQUEST_TIME)
                continue
            # 每一页的asin数量*页数 + 当前页面的rank
            rank = (current_page - 1) * asin_num + rank
            mylog.info("爬取到的排名:%d" % rank)
            save_rank(asin_str, keyword_str, rank)
            break
        time.sleep(WAIT_REQUEST_TIME)

    mylog.info("当前正在爬取的是列表页rank,等待下一轮爬取,间隔1h......")
    t1 = Timer(SPIDER_INTERVAL, main)
    t1.start()


if __name__ == '__main__':
    mylog = MyLog()
    main()