# coding: utf-8 import os import time import threading import json from multiprocessing import Manager from logger import MyLog from common import updateCurrencyRate from const import * curpath = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) logpath = os.path.join(curpath, "logger") main_logger = MyLog(loggerName='main', nameTail='main', logPath=logpath, debug=True) thread_logger = MyLog(loggerName='thread', nameTail='thread', logPath=logpath, debug=True) manager = Manager() MCURRENCY_RATES = manager.dict(CURRENCY_RATES) mthread = threading.Thread(target=updateCurrencyRate, args=(CURRENCY_REQ_URL, MCURRENCY_RATES)) mthread.setDaemon(True) mthread.start() print MCURRENCY_RATES
asin_num, rank = parse_html(asin_str, list_html) # asin_num == 0,表明该页没有商品数据 if asin_num == 0: save_rank(asin_str, keyword_str, None) break if rank is None: # 当页不存在该asin时请求下一页 current_page += 1 if current_page == MAX_SPIDER_PAGE: # 如果到了第20页还查不到该asin,则排名设置为300 save_rank(asin_str, keyword_str, MAX_RANK) break time.sleep(WAIT_REQUEST_TIME) continue # 每一页的asin数量*页数 + 当前页面的rank rank = (current_page - 1) * asin_num + rank mylog.info("爬取到的排名:%d" % rank) save_rank(asin_str, keyword_str, rank) break time.sleep(WAIT_REQUEST_TIME) mylog.info("当前正在爬取的是列表页rank,等待下一轮爬取,间隔1h......") t1 = Timer(SPIDER_INTERVAL, main) t1.start() if __name__ == '__main__': mylog = MyLog() main()