def __init__(self): self.app = Flask(__name__) self.mongo_pool = MongoPool() @self.app.route('/random/') def random(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_DEFAULT_COUNT) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies/') def proxies(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_DEFAULT_COUNT) proxies = [{'ip': proxy.ip, 'port': proxy.port} for proxy in proxies] return json.dumps(proxies) @self.app.route('/disabled_domain/') def disable_domain(): ip = request.args.get('ip', None) domain = request.args.get('domain', None) if ip is None: return '填写ip' if domain is None: return '填写域名' self.mongo_pool.disabled_domain(ip, domain) return '成功设置{}禁止访问{}'.format(ip, domain)
def __init__(self): self.app = Flask(__name__) self.mongo_pool = MongoPool() @self.app.route('/random') def random(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=API_COUNT) print(proxy) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/all') def all(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=API_COUNT) proxies = [proxy.__dict__ for proxy in proxies] return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return "请提供ip参数" if domain is None: return "请传入域名" self.mongo_pool.disable_domain(ip, domain) return "{} 禁用域名{} 成功".format(ip, domain)
def __init__(self): # 2.1 初始一个Flask的Web服务 self.app = Flask(__name__) # 创建MongoPool对象,用于操作数据库 self.mongo_pool = MongoPool() @self.app.route('/random') def random(): """ 2.2 实现根据协议类型和域名,提供随机的获取高可用代理IP的服务 可用通过 protocol 和 domain 参数对IP进行过滤 protocol:当前请求的协议类型 domain:当前请求域名 :return: """ protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return f'{protocol}://{proxy.ip}:{proxy.port}' else: return f'{proxy.ip}:{proxy.port}' @self.app.route('/proxies') def proxies(): """ 2.3 实现根据协议类型和域名,提供获取多个高可用代理IP的服务 可用通过protocol 和 domain 参数对IP进行guolv 实现给指定的IP上追加不可用域名的服务 :return: """ # 获取协议: http/https protocol = request.args.get('protocol') # 域名:如jd.com domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) # proxies 是一个Proxy对象的列表,但是Proxy对象不饿能进行json序列化,需要转换为字典的列表 # 转换为字典列表 proxies = [proxy.__dict__ for proxy in proxies] return {"proxies": proxies} @self.app.route('/disable_domain') def disable_domain(): # 2.4 如果在获取IP的时候,有指定域名参数,将不再获取该IP,从而进一步提高代理IP的可用性 ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return '请提供ip参数' if domain is None: return '请提供域名domain参数' self.mongo_pool.disable_domain(ip, domain) return f"{ip} 禁用域名 {domain} 成功"
def __init__(self): # 创建MongoPool对象 self.mongopool = MongoPool() # 3,使用异步来执行每一个爬虫任务 # 3.1 在init方法中创建协程池队象 self.coroutine_pool = Pool()
def __init__(self): # 初始化一个Flask的Web服务 self.app = Flask(__name__) # 创建MongoPool对象,用于操作数据库 self.mongo_pool = MongoPool() @self.app.route('/random') def random(): ''' 根据协议类型和域名,提供随机的获取高可用代理IP的服务 :protocol: 当前请求的协议类型 :domain: 当前请求域名 ''' protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT, nick_type=2) if protocol: return "{}://{}:{}".format(protocol, proxy.ip, proxy.port) else: return "{}:{}".format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): ''' 实现根据协议类型和域名,提供获取多个高可用代理的IP服务 :return: ''' protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT, nick_type=0) # proxies 是一个Proxy对象列表,需要转化为字典列表 # 转化为字典列表 proxies = [proxy.__dict__ for proxy in proxies] # 返回json格式的字符串 return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): ''' 如果在获取IP的时候,有指定域名参数,将不再获取该IP,从而进一步提高代理IP的可用性 :return: ''' ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return "请提供ip参数" if domain is None: return "请提供域名domain参数" self.mongo_pool.disable_domain(ip, domain) return f"{ip}禁用域名{domain}成功"
def __init__(self, module_name='', spider_list=[]): if module_name: self.module_name = module_name if spider_list: self.spider_list = spider_list self.mongo_pool = MongoPool() #创建协程池 self.coroutine_pool = Pool()
def __init__(self): #初始化一个Flask的Web服务 self.app = Flask(__name__) #创建MongoPool对象,用于操作数据库 self.mongo_pool = MongoPool() @self.app.route('/random') def random(): """ 2.2实现根据协议类型和域名,提供随机的获取高可用的代理IP服务 可通过protocol 和 domain 参数对Ip进行过滤 protocol :当前请求的协议 domain :当前请求域名 :return: """ protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): """ 2.3实现根据协议和域名,提供获取多个高可用代理的服务 可指定potocol 和domain 参数对IP进行过滤 """ protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) #proxies 是一个Proxy对象的列表,但是Proxy对象不能进行json序列化,需要转换成字典列表 #转化为字典 proxies = [proxy.__dict__ for proxy in proxies] #返回json字符串 return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): #2.4实现给指定的IP上追加不可用域名的服务 #如果在获取IP的时候,有指定域名参数,将不在获取该ip,从而进一步提高代理IP的可用性 ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return '情提供ip参数' if domain is None: return '情提供域名domain参数' self.mongo_pool.disable_domain(ip, domain) return '{}禁用域名{}成功'.format(ip, domain)
def __init__(self): # 实现初始方法 # 初始一个Flask的Web服务 self.app = Flask(__name__) # 创建MongoPool对象 self.mongo_pool = MongoPool() @self.app.route('/random') def random(): """ 实现根据协议类型和域名,提供随机的获取高可用代理IP的服务 可用通过protocol和domain参数对IP进行过滤 protocol:当前请求的协议类型 domain:当前请求域名 :return: """ protocol = request.args.get('protocal') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): """实现根据协议类型和域名,提供获取多个高可用代理IP的服务""" protocol = request.args.get('protocal') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) # proxies是对象的列表,不能进行json序列化,需要转化为字典列表 # 转换为字典列表 proxies = [proxy.__dict__ for proxy in proxies] # 返回json格式字符串 return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): """如果在获取IP的时候,有指定域名参数,将不在获取该IP从而进一步提高代理IP的可用性""" ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return '请求提供ip参数' if domain is None: return '请提供域名domain参数' self.mongo_pool.disable_domain(ip, domain) return "{} 禁用域名{} 成功".format(ip, domain)
def __init__(self): # 实现初始方法 # 2.1 初始一个Flask的web服务器 self.app = Flask(__name__) # 创建MongoPool对象用于操作数据库 self.mongo_pool = MongoPool() @self.app.route('/random') def random(): """ 2.2 实现根据协议类型和域名,提供随机获取高可用代理ip的服务 - 可通过protocol和domain参数对ip进行过滤 - protocol:当前请求的协议类型 - domain:当前请求域名 """ protocol = request.args.get('protocol') domain = request.args.get('domain') # print(protocol) # print(domain) proxy = self.mongo_pool.random_proxies(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return f"{protocol}://{proxy.ip}:{proxy.port}" else: return f"{proxy.ip}:{proxy.port}" # return '测试' @self.app.route('/proxies') def proxies(): """ 2.3 实现根据协议类型和域名,提供获取多个高可用代理ip的服务 可用通过protocol和domain参数对ip进行过滤 :return: """ protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) # proxies是一个Proxy对象的列表,但是proxy对象不能进行json序列化,需要转换为字典列表 proxies = [proxy.__dict__ for proxy in proxies] # 返回json格式值串 return json.dumps(proxies) # 2.4 实现给指定ip追加不可用域名的服务 @self.app.route('/disable') def disable_domain(): ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return '请提供ip参数' if domain is None: return '请提供domain参数' self.mongo_pool.disable_domain(ip, domain) return f"{ip}禁用域名{domain}成功"
def __init__(self): self.app = Flask(__name__) self.mongo_pool = MongoPool() @self.app.route('/random') def random(): protocol = request.args.get('protocol') proxy = self.mongo_pool.usable_proxy() if protocol: return f'{protocol}://{proxy.ip}:{proxy.port}' else: return f'{proxy.ip}:{proxy.port}'
def __init__(self): #初始一个Flask的web服务 self.app=Flask(__name__) #创建MongoPool对象,用于操作数据库 self.mongo_pool=MongoPool() @self.app.route('/random') def random(): """ # 实现根据协议类型和域名,提供随机的获取高可用代理IP的服务 # 可用通过protocol和domain 参数对ip进行过滤 # protocol: 当前请求的协议类型 # domain: 当前请求域名 :return: """ protocol=request.args.get('protocol') domain=request.args.get('domain') print(protocol) print(domain) proxy=self.mongo_pool.random_proxy(protocol,domain,count=PROXIES_MAX_COUNT) if protocol: return '{}://{}:{}'.format(protocol,proxy.ip,proxy.port) else: return '{}:{}'.format(proxy.ip,proxy.port) @self.app.route('/proxies') def proxies(): # 实现根据协议类型和域名,提供获取多个高可用代理IP的服务 # 可用通过protocol和domain参数对ip进行过滤 # 实现给指定的IP上追加不可用域名的服务 #获取协议:http/https protocol=request.args.get('proxies') #区域名:jd.com domain=request.args.get('domain') proxies=self.mongo_pool.get_proxies(protocol,domain,count=PROXIES_MAX_COUNT) #proxies 是一个 Proxy对象的列表,但是Proxy对象不能进行josn序列化,需要转化字典列表 proxies=[proxy.__dict__ for proxy in proxies] #返回json格式值串 return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): # 如果在获取ip的时候,有指定域名参数,将不再获取IP,从而进一步提高代理IP的可用性 ip=request.args.get('ip') domain=request.args.get('domain') if ip is None: return "请提供ip参数" if domain is None: return '请提供域名domain参数' self.mongo_pool.disable_domain(ip,domain) return "{} 禁用域名 {} 成功".format(ip,domain)
def __init__(self, count): self.count = count self.app = Flask(__name__, template_folder="../assets/templates") self.mongo_pool = MongoPool() @self.app.route('/') @self.app.route('/index') def index(): return render_template("index.html") @self.app.route('/random') def random(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=self.count) if protocol: return f"{protocol}://{proxy.ip}:{proxy.port}" else: return f"{proxy.ip}:{proxy.port}" @self.app.route('/proxies') def proxies(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=self.count) # 将Proxy对象列表转化为字典列表 proxies = [proxy.__dict__ for proxy in proxies] # 将字典变为json返回 return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return '请提供IP参数\n' if domain is None: return '请提供domain参数\n' self.mongo_pool.disable_domain(ip, domain) return f"{ip} 禁用域名 {domain} 成功"
def __init__(self): self.app = Flask(__name__) # 创建数据库 self.mongo_pool = MongoPool() @self.app.route('/random') def random(): ''' 实现根据协议类型和域名, 提供随机的获取高可用代理ip服务 可以通过 protocol 和 domain 参数对ip进行过滤 protocol:当前请求的协议类型 domain: 当前请求域名 :return: ''' protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) # proxies 是一个对象的列表,不能json序列化 ,需要转化为字典列表 proxies = [proxy.__dict__ for proxy in proxies] # 返回json return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return "请提供ip参数" if domain is None: return "请提供domain参数" self.mongo_pool.disable_domain(ip, domain) return '{} 禁用域名 {} 成功'.format(ip, domain)
def __init__(self): self.app = Flask(__name__) self.mongo_pool = MongoPool() @self.app.route('/random') def random_proxy(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.find_all() p_index = random.randint(1, 100) pr = proxies[p_index] pr = pr.__dict__ return pr @self.app.route('/proxies') def proxies_list(): proxies = self.mongo_pool.find_all() dict_list = [proxy.__dict__ for proxy in proxies] return json.dumps(dict_list)
def __init__(self): self.app = Flask(__name__) self.mongo_pool = MongoPool() @self.app.route('/random') def random(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) #把proxies对象转换为字典 proxies = [proxy.__dict__ for proxy in proxies] #返回json格式列表 return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return '请求提供Ip参数' if domain is None: return '请提供域名domain参数' self.mongo_pool.disable_domain(ip, domain) return "{} 禁用域名 {} 成功".format(ip, domain)
def __init__(self): # 2. 实现初始化方法 # 2.1 初始一个Flask的Web服务 self.app = Flask(__name__) self.mongo_pool = MongoPool() @self.app.route('/') def index(): html = ''' <h2 align="center">Welcome to my proxies!</h2> <div align="center"><a href="http://*****:*****@self.app.route('/random') def random(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) proxies = [proxy.__dict__ for proxy in proxies] return json.dumps(proxies, ensure_ascii=False)
def __init__(self): # 初始化flask服务 self.app = Flask(__name__) self.mongo_pool = MongoPool() @self.app.route('/random') def random(): protocol = request.args.get("protocol") domain = request.args.get("domain") proxy = self.mongo_pool.get_random_proxy(protocl=protocol, domain=domain) if proxy: return "{}://{}:{}".format(protocol, proxy.ip, proxy.port) return "test" @self.app.route('/proxies') def proxies(): # 获取的协议 protocol = request.args.get("protocol") # 域名 domain = request.args.get("domain") proxies = self.mongo_pool.get_proxies(protocl=protocol, domain=domain, count=PROXIES_MAX_COUNT) proxies_list = [proxy.__dict__ for proxy in proxies] return json.dumps(proxies_list) @self.app.route('/disable_domain') def disable_domain(): ip = request.args.get('ip') domain = request.args.get("domain") if ip is None: return "ip不能为空" if domain is None: return "domain不能为空" self.mongo_pool.disable_domain(ip, domain) return "{} 禁用{} 成功".format(ip, domain)
def __init__(self): # 2. 实现初始化方法 # 2.1 初始一个Flask的Web服务 self.app = Flask(__name__) self.mongo_pool = MongoPool() @self.app.route('/random') def random(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): protocol = request.args.get('protocol') domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) proxies = [proxy.__dict__ for proxy in proxies] return json.dumps(proxies)
def __init__(self): self.mongo_pool = MongoPool() # 创建Queue和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): #创建操作数据库的MonggoPool对象 self.mongo_pool = MongoPool() # 在init方法中创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): # 创建MongoPool代理池对象 self.mongo_pool = MongoPool() # 3.1 创建协程池对象 self.coroutine_pool = Pool()
def __init__(self): #创建MongoDB对象 self.mongo_pool = MongoPool() #在init中创建协程池 self.coroutine_pool = Pool()
def __init__(self): # 创建操作数据库的MongoPool对象 self.mongo_pool = MongoPool() # 3.1 在`init`方法, 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): # 2. 实现初始方法 # 2.1 初始一个Flask的Web服务 self.app = Flask(__name__) # 创建MongoPool对象, 用于操作数据库 self.mongo_pool = MongoPool() @self.app.route('/random') def random(): """ 2.2 实现根据协议类型和域名, 提供随机的获取高可用代理IP的服务 可用通过 protocol 和 domain 参数对IP进行过滤 protocol: 当前请求的协议类型 domain: 当前请求域名 """ protocol = request.args.get('protocol') domain = request.args.get('domain') proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT) if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): """ 2.3 实现根据协议类型和域名, 提供获取多个高可用代理IP的服务 可用通过protocol 和 domain 参数对IP进行过滤 实现给指定的IP上追加不可用域名的服务 """ # 获取协议: http/https protocol = request.args.get('protocol') # 域名: 如:jd.com domain = request.args.get('domain') proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT) # proxies 是一个 Proxy对象的列表, 但是Proxy对象不能进行json序列化, 需要转换为字典列表 # 转换为字典列表 proxies = [proxy.__dict__ for proxy in proxies] # 返回json格式值串 return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): """ 2.4 如果在获取IP的时候, 有指定域名参数, 将不在获取该IP, 从而进一步提高代理IP的可用性. """ ip = request.args.get('ip') domain = request.args.get('domain') if ip is None: return '请提供ip参数' if domain is None: return '请提供域名domain参数' self.mongo_pool.disable_domain(ip, domain) return "{} 禁用域名 {} 成功".format(ip, domain)
def __init__(self): #在init中,建立数据连接,获取要操作的集合 self.mongo_pool=MongoPool() # 在init方法中创建协程池对象 self.coroutine_pool=Pool()
def __init__(self): # 创建mongopool对象 self.mongo_pool = MongoPool() # 3.1 在init方法,创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): self.mongo_pool = MongoPool() self.coroutine_pool = Pool()
def __init__(self): # 2.1 初始化一个Flask的Web服务 self.app = Flask(__name__) # 创建mongodb数据库连接对象 self.mongo_pool = MongoPool() @self.app.route('/random') def random(): """ 2.2 实现根据协议类型和域名, 提供随机的获取高可用代理IP的服务 可用通过 protocol 和 domain 参数对IP进行过滤 protocol: 当前请求的协议类型 domain: 当前请求域名 """ protocol = request.args.get('protocol') domain = request.args.get('domain') nick_type = request.args.get('nick_type') try: proxy = self.mongo_pool.random_proxy(protocol=protocol, domain=domain, nick_type=int(nick_type), count=PROXIES_MAX_COUNT) except Exception as e: logger.exception(e) return '数据库无法获取对应的Ip,请更改参数后重试' if protocol: return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port) else: return '{}:{}'.format(proxy.ip, proxy.port) @self.app.route('/proxies') def proxies(): """ 2.3 实现根据协议类型和域名, 提供获取多个高可用代理IP的服务 可用通过protocol 和 domain 参数对IP进行过滤 实现给指定的IP上追加不可用域名的服务 """ # 获取协议: http/ https protocol = request.args.get('protocol') # 域名: 如 jd.com domain = request.args.get('domain') nick_type = request.args.get('nick_type') try: proxies = self.mongo_pool.get_proxies(protocol=protocol, domain=domain, nick_type=int(nick_type), count=PROXIES_MAX_COUNT) except Exception as e: logger.exception(e) return '数据库无法获取对应的Ip,请更改参数后重试' proxies = [proxy.__dict__ for proxy in proxies] return json.dumps(proxies) @self.app.route('/disable_domain') def disable_domain(): """ 2.4 如果在获取IP的时候, 有指定域名参数, 将不在获取该IP, 从而进一步提高代理IP的可用性. :return: """ ip = request.args.get('ip') domain = request.args.get('domain') if 1 or ip and domain is None: return '参数IP 与 domain缺一不可' try: self.mongo_pool.disable_domain(ip, domain) except Exception as e: logger.exception(e) return 'IP不存在或格式有误,请核实后再试' return 'IP:{} 禁用域名 {} 成功'.format(ip, domain)
area = self.get_first_ele(tr.xpath(self.detail_xpath['area'])) proxy = Proxy(ip, port, area=area) yield proxy def get_first_ele(self, lis): return lis[0] if len(lis) != 0 else '' def get_proxies(self): for url in self.urls: page = self.get_page_from_url(url) proxies = self.get_proxies_from_page(page) yield from proxies if __name__ == '__main__': config = { 'urls': ['https://www.xicidaili.com/nt/{}'.format(i) for i in range(1, 4)], 'group_xpath': '//*[@id="ip_list"]/tr', # 查看网页源代码里面是否真的有tbody, 'detail_xpath': { 'ip': './td[2]/text()', 'port': './td[3]/text()', 'area': './td[4]/a/text()', } } c_spider = BaseSpider(**config) for line, _ in enumerate(c_spider.get_proxies()): mongo = MongoPool() mongo.insert_one(_) print(line, _)
def __init__(self): # 创建操作数据库的mongoPool对象 self.mongo_pool = MongoPool() self.queue = Queue() self.coroutine_pool = Pool()