def test_client(setup): client = ProxyPoolClient() client.open() client.spot_proxy('127.0.0.1', 8080, 'China') p = client.req_proxy('http://www.baidu.com') assert p == "http://127.0.0.1:8080" p = client.req_proxy('http://www.baidu.com') assert p == "" client.free_proxy("127.0.0.1:8080", 0.1) p = client.req_proxy_for_validate() assert p == "http://127.0.0.1:8080" client.close()
def spider_page(page_url, parse): headers = {'user-agent': utility.random_ua()} try: r = requests.get(page_url, headers=headers, timeout = 30) except: logger.traceback() return if r.status_code != 200: logger.error("get list page failed, url = %s, status_code = %d", page_url, r.status_code) return client = ProxyPoolClient() try: client.open() logger.debug_fun("connect ok") except Thrift.TException: logger.traceback() logger.debug_fun("connect failed, quit") return logger.debug_fun("get list page ok, url = %s", page_url) candidate_proxies = parse(r.content) for ip, port in candidate_proxies: logger.debug_fun("check proxy, ip = %s, port = %d", ip, port) proxy_url = Proxy.make_proxy_url(ip, port) ret, resp_second = validate_proxy(proxy_url) if not ret or resp_second > settings.PROXY_MAX_DELAY: logger.debug_fun("check proxy failed, proxy_url = %s", proxy_url) continue logger.debug_fun("check proxy ok, proxy_url = %s", proxy_url) try: proxy_exists = client.has_proxy(proxy_url) except: logger.traceback() logger.debug_fun("check proxy exists failed, proxy_url = %s", proxy_url) break if proxy_exists: logger.debug_fun("proxy exists, proxy_url = %s", proxy_url) else: try: country = utility.get_ip_country(ip) except: logger.traceback() logger.debug_fun("get country failed, proxy_url = %s", proxy_url) break try: client.spot_proxy(ip, port, country) except: logger.traceback() logger.debug_fun("spot new proxy failed, ip = %s, port = %d, country = %s", ip, port, country) break logger.debug_fun("spot new proxy, ip = %s, port = %d, country = %s", ip, port, country)
def validate_all_proxies(): client = ProxyPoolClient() try: client.open() validator_logger.debug_fun("connect ok") except: validator_logger.traceback() validator_logger.debug_fun("connect failed, quit") return try: while True: p = client.req_proxy_for_validate() if p=="": break validator_logger.debug("start validate %s", p) ok, elapsed = validate_proxy(p) validator_logger.debug("validate ok, ok = %s, elapsed = %f", ok, elapsed) client.update_proxy_status(p, ok) except: logger.traceback() return validator_logger.debug("validate all finished")