예제 #1
0
    def __init__(self, db_server, api_server, proxy_server, client_uid, similar_min=15):
        self.api_server = api_server
        self.client_uid = client_uid

        self.account_pool = AccountPool(db_server, api_server, proxy_server)
        self.user_pool = UserPool(db_server, api_server, proxy_server)
        self.proxy_pool = ProxyPool(proxy_server)
        self.record_pool = RecordPool(db_server, api_server)
        self.similar_min = similar_min
예제 #2
0
 def __init__(self, args):
     sess = requests.Session()
     sess.mount('https://', HTTPAdapter(max_retries=Retry(total=3)))
     database_client = DB_CLIENTS[args.db](config.DB_NAME)
     log_name = time.strftime('proxy_pool_%Y%m%d_%H%M%S')
     self.args = args
     self.logger = ColorfulLog(LOG_LEVELS[args.level],
                               log_dir=config.LOG_PATH,
                               log_name=log_name)
     self.proxy_pool_client = Client(caller='proxy_pool',
                                     host=args.host,
                                     port=args.port)
     self.proxy_pool = ProxyPool(database_client, sess, self.logger,
                                 self.proxy_pool_client)
예제 #3
0
    def __init__(self, db_server, api_server, proxy_server):
        self.print('Pending: Start initializing the account pool')
        self.api_server = api_server
        self.db_server = db_server

        self.session = requests.session()

        self.db = pymongo.MongoClient(self.db_server, 27017).net_ease.account

        self.proxy_pool = ProxyPool(proxy_server)
        self.login_accounts()
        self.refill_thread = threading.Thread(target=self.refill_tasks)
        self.refill_thread.start()

        self.print('Success: Finish initializing the account pool')
예제 #4
0
 def __init__(self, db_server, api_server, proxy_server):
     self.api_server = api_server
     self.db = pymongo.MongoClient(db_server, 27017).net_ease.user
     self.proxy_pool = ProxyPool(proxy_server)
예제 #5
0
import sys, os
from pprint import pprint
import pymysql
import random

from utils import year_generator
from daily import *
from proxy_pool import ProxyPool
from settings import *

pp = ProxyPool()

if __name__ == '__main__':
    sid = int(sys.argv[1])
    use_proxy = int(sys.argv[2])
    start = int(sys.argv[3])
    for date in year_generator(start_year=start):
        print(date)
        dd = crawl_daily_data(sid, date, use_proxy)
        if dd:
            insert_daily_data(sid, dd)
        bd = crawl_daily_bwibbw(sid, date, use_proxy)
        if bd:
            insert_bwibbw_data(sid, bd)
        time.sleep(random.randint(5, 15))
예제 #6
0
    queue = MyPriorityQueue(maxsize=config.queue_num)
    await proxy.init_proxy_pool(config.local_num)
    producer = []
    for idx, url in config.urls[place].items():
        loop.create_task(
            douban_producer(queue, proxy, place, idx, url, 1, end_page,
                            config.producer_time))
    consumer = [
        loop.create_task(douban_consumer(queue, proxy, i, config.consumer_num))
        for i in range(config.consumer_num)
    ]
    await asyncio.wait(consumer + producer)


if __name__ == "__main__":
    proxy = ProxyPool()
    event_loop = asyncio.get_event_loop()
    print("请输入对应的数字选择初始化模式")
    print("1 全部抓取")
    print("2 选择地区进行抓取")
    flag = input()
    if flag == "1":
        print("你已选择 模式1 全部抓取 请输入抓取页数")
        end_page = input()
        event_loop.run_until_complete(
            model_one(event_loop, proxy, int(end_page)))
    else:
        print("你已选择 模式2 选择地区抓取 请输入对应数字选择抓取地区")
        place_map = {
            idx: place
            for idx, place in enumerate(config.urls.keys())
예제 #7
0
headers = {
    'User-Agent':
    'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/61.0.3163.100 Chrome/61.0.3163.100 Safari/537.36',
    'Referer': 'http://www.tse.com.tw/zh/page/trading/exchange/BWIBBU.html'
}

no_match_data_byte_string = b'\xe5\xbe\x88\xe6\x8a\xb1\xe6\xad\x89\xef\xbc\x8c\xe6\xb2\x92\xe6\x9c\x89\xe7\xac\xa6\xe5\x90\x88\xe6\xa2\x9d\xe4\xbb\xb6\xe7\x9a\x84\xe8\xb3\x87\xe6\x96\x99!'.decode(
    'utf8')
# Get Stock_id
_get_sid = """
SELECT stock_id FROM stock_list
"""

# Global proxy queue
proxy_pool = ProxyPool()


def _get_twsec_data(twsec_url, headers=None, use_proxy=False):
    max_retry = 5
    data = None
    while True:
        try:
            if use_proxy:
                ip, port, delay, count = proxy_pool.get()
                if not ip:
                    print('no proxy can use')
                    use_proxy = None
                    continue
                proxies = {
                    'http': 'http://{}:{}'.format(ip, port),
예제 #8
0
from proxy_pool import ProxyPool

if __name__ == '__main__':
    pool = ProxyPool()
    proxy = pool.getproxy()
    print proxy