コード例 #1
0
import requests
from bs4 import BeautifulSoup

import proxy_switcher
from api_helper import CRAWLER_HEADERS
from token_bucket import Bucket
from utils.jsdati import JsdatiApi
from config.config import V2EX_USERNAME, V2EX_PASSWORD, JSDATI_USERNAME, JSDATI_PASSWORD
from utils.notification import wechat_notify

V2EX_INDEX_URL = 'https://www.v2ex.com'
V2EX_SIGNIN_URL = 'https://www.v2ex.com/signin'
V2EX_TOPIC_WEB_URL = 'https://www.v2ex.com/t/{topic_id}'

bucket = Bucket(rate=0.3, burst=1)

dmapi = JsdatiApi(JSDATI_USERNAME, JSDATI_PASSWORD)


def consume_token(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        while bucket.get() < 1:
            time.sleep(0.5)
        bucket.desc()
        return func(*args, **kwargs)

    return wrapper

コード例 #2
0
 def __init__(self, rate=0, burst=0):
     self.mutex = threading.Lock()
     self.priority_queue = PriorityTaskQueue()
     self.time_queue = PriorityTaskQueue()
     self.processing = PriorityTaskQueue()
     self.bucket = Bucket(rate=rate, burst=burst)
コード例 #3
0
ファイル: api_helper.py プロジェクト: zhileichen/v2ex-crawler
}

# V2EX API
V2EX_SITE_URL = 'https://www.v2ex.com'

STATS_API_PATH = '/api/site/stats.json'
ALL_NODES_PATH = '/api/nodes/all.json'
NODE_INFO_PATH = '/api/nodes/show.json'  # param: `id` or `name`
LATEST_TOPICS_PATH = '/api/topics/latest.json'
TOPIC_INFO_PATH = '/api/topics/show.json'  # param: `id`
REPLIES_OF_TOPIC_PATH = '/api/replies/show.json'  # param: `topic_id`
MEMBER_INFO_PATH = '/api/members/show.json'  # param: `id` or `username`

API_RATE_LIMIT_ONE_HOUR = 120

bucket = Bucket(rate=0.5, burst=1)


def consume_token(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        while bucket.get() < 1:
            time.sleep(0.5)
        bucket.desc()
        return func(*args, **kwargs)

    return wrapper


class APIHelper(object):
    """API service with traffic flow controller"""