def __init__(self, db_server, api_server, proxy_server, client_uid, similar_min=15): self.api_server = api_server self.client_uid = client_uid self.account_pool = AccountPool(db_server, api_server, proxy_server) self.user_pool = UserPool(db_server, api_server, proxy_server) self.proxy_pool = ProxyPool(proxy_server) self.record_pool = RecordPool(db_server, api_server) self.similar_min = similar_min
def __init__(self, args): sess = requests.Session() sess.mount('https://', HTTPAdapter(max_retries=Retry(total=3))) database_client = DB_CLIENTS[args.db](config.DB_NAME) log_name = time.strftime('proxy_pool_%Y%m%d_%H%M%S') self.args = args self.logger = ColorfulLog(LOG_LEVELS[args.level], log_dir=config.LOG_PATH, log_name=log_name) self.proxy_pool_client = Client(caller='proxy_pool', host=args.host, port=args.port) self.proxy_pool = ProxyPool(database_client, sess, self.logger, self.proxy_pool_client)
def __init__(self, db_server, api_server, proxy_server): self.print('Pending: Start initializing the account pool') self.api_server = api_server self.db_server = db_server self.session = requests.session() self.db = pymongo.MongoClient(self.db_server, 27017).net_ease.account self.proxy_pool = ProxyPool(proxy_server) self.login_accounts() self.refill_thread = threading.Thread(target=self.refill_tasks) self.refill_thread.start() self.print('Success: Finish initializing the account pool')
def __init__(self, db_server, api_server, proxy_server): self.api_server = api_server self.db = pymongo.MongoClient(db_server, 27017).net_ease.user self.proxy_pool = ProxyPool(proxy_server)
import sys, os from pprint import pprint import pymysql import random from utils import year_generator from daily import * from proxy_pool import ProxyPool from settings import * pp = ProxyPool() if __name__ == '__main__': sid = int(sys.argv[1]) use_proxy = int(sys.argv[2]) start = int(sys.argv[3]) for date in year_generator(start_year=start): print(date) dd = crawl_daily_data(sid, date, use_proxy) if dd: insert_daily_data(sid, dd) bd = crawl_daily_bwibbw(sid, date, use_proxy) if bd: insert_bwibbw_data(sid, bd) time.sleep(random.randint(5, 15))
queue = MyPriorityQueue(maxsize=config.queue_num) await proxy.init_proxy_pool(config.local_num) producer = [] for idx, url in config.urls[place].items(): loop.create_task( douban_producer(queue, proxy, place, idx, url, 1, end_page, config.producer_time)) consumer = [ loop.create_task(douban_consumer(queue, proxy, i, config.consumer_num)) for i in range(config.consumer_num) ] await asyncio.wait(consumer + producer) if __name__ == "__main__": proxy = ProxyPool() event_loop = asyncio.get_event_loop() print("请输入对应的数字选择初始化模式") print("1 全部抓取") print("2 选择地区进行抓取") flag = input() if flag == "1": print("你已选择 模式1 全部抓取 请输入抓取页数") end_page = input() event_loop.run_until_complete( model_one(event_loop, proxy, int(end_page))) else: print("你已选择 模式2 选择地区抓取 请输入对应数字选择抓取地区") place_map = { idx: place for idx, place in enumerate(config.urls.keys())
headers = { 'User-Agent': 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/61.0.3163.100 Chrome/61.0.3163.100 Safari/537.36', 'Referer': 'http://www.tse.com.tw/zh/page/trading/exchange/BWIBBU.html' } no_match_data_byte_string = b'\xe5\xbe\x88\xe6\x8a\xb1\xe6\xad\x89\xef\xbc\x8c\xe6\xb2\x92\xe6\x9c\x89\xe7\xac\xa6\xe5\x90\x88\xe6\xa2\x9d\xe4\xbb\xb6\xe7\x9a\x84\xe8\xb3\x87\xe6\x96\x99!'.decode( 'utf8') # Get Stock_id _get_sid = """ SELECT stock_id FROM stock_list """ # Global proxy queue proxy_pool = ProxyPool() def _get_twsec_data(twsec_url, headers=None, use_proxy=False): max_retry = 5 data = None while True: try: if use_proxy: ip, port, delay, count = proxy_pool.get() if not ip: print('no proxy can use') use_proxy = None continue proxies = { 'http': 'http://{}:{}'.format(ip, port),
from proxy_pool import ProxyPool if __name__ == '__main__': pool = ProxyPool() proxy = pool.getproxy() print proxy