import requests from bs4 import BeautifulSoup import proxy_switcher from api_helper import CRAWLER_HEADERS from token_bucket import Bucket from utils.jsdati import JsdatiApi from config.config import V2EX_USERNAME, V2EX_PASSWORD, JSDATI_USERNAME, JSDATI_PASSWORD from utils.notification import wechat_notify V2EX_INDEX_URL = 'https://www.v2ex.com' V2EX_SIGNIN_URL = 'https://www.v2ex.com/signin' V2EX_TOPIC_WEB_URL = 'https://www.v2ex.com/t/{topic_id}' bucket = Bucket(rate=0.3, burst=1) dmapi = JsdatiApi(JSDATI_USERNAME, JSDATI_PASSWORD) def consume_token(func): @functools.wraps(func) def wrapper(*args, **kwargs): while bucket.get() < 1: time.sleep(0.5) bucket.desc() return func(*args, **kwargs) return wrapper
def __init__(self, rate=0, burst=0): self.mutex = threading.Lock() self.priority_queue = PriorityTaskQueue() self.time_queue = PriorityTaskQueue() self.processing = PriorityTaskQueue() self.bucket = Bucket(rate=rate, burst=burst)
} # V2EX API V2EX_SITE_URL = 'https://www.v2ex.com' STATS_API_PATH = '/api/site/stats.json' ALL_NODES_PATH = '/api/nodes/all.json' NODE_INFO_PATH = '/api/nodes/show.json' # param: `id` or `name` LATEST_TOPICS_PATH = '/api/topics/latest.json' TOPIC_INFO_PATH = '/api/topics/show.json' # param: `id` REPLIES_OF_TOPIC_PATH = '/api/replies/show.json' # param: `topic_id` MEMBER_INFO_PATH = '/api/members/show.json' # param: `id` or `username` API_RATE_LIMIT_ONE_HOUR = 120 bucket = Bucket(rate=0.5, burst=1) def consume_token(func): @functools.wraps(func) def wrapper(*args, **kwargs): while bucket.get() < 1: time.sleep(0.5) bucket.desc() return func(*args, **kwargs) return wrapper class APIHelper(object): """API service with traffic flow controller"""