async def yield_proxy(self, *args, **kwargs): # https://www.nyloner.cn/proxy ev_loop = kwargs.get('ev_loop') async with webutils.WebSpider(ev_loop) as spider: spider.header.update({'Host': 'www.nyloner.cn', 'Referer': 'https://www.nyloner.cn/proxy'}) proxies = [] num = 15 status, _ = await spider.get('https://www.nyloner.cn/proxy') if status != 200: mylog.error('%s 访问出错', __name__) return proxies for page in range(1, 50): t = int(datetime.datetime.now().timestamp()) status, resp_html = await spider.get('https://www.nyloner.cn/proxy', params={ 'page': page, 'num': num, 't': t, 'token': self.gen_token(page, num, t)}) if status != 200: continue try: js_result = json.loads(resp_html, encoding='utf-8') if js_result['status'].lower() == 'true': for pd in json.loads(self.decode_str(js_result['list'])): proxies.append(models.ProxyTbl(host=pd['ip'], port=int(pd['port']), scheme='http', country='未知')) except json.JSONDecodeError as er: mylog.warning('%s 解析返回值<%s>出错: %s', __name__, resp_html, er) return proxies return proxies
async def yield_proxy(self, *args, **kwargs): ev_loop = kwargs.get('ev_loop') async with webutils.WebSpider(ev_loop) as spider: spider.header.update({'Host': 'www.kewangst.com', 'Referer': 'https://www.kewangst.com/ProxyList'}) proxies = [] status, resp_text = await spider.get('https://www.kewangst.com/ProxyList') if status != 200: mylog.error('%s 访问出错', __name__) return proxies with io.StringIO(resp_text) as fp: while True: line = fp.readline() if line: line = line.strip() else: break if line.startswith('http'): try: parse_result = urllib.parse.urlparse(line) proxies.append(models.ProxyTbl( host=parse_result.hostname, port=parse_result.port, scheme=parse_result.scheme, country='未知')) except ValueError as e: mylog.warning(e) return proxies
async def yield_proxy(self, *args, **kwargs): ev_loop = kwargs.get('ev_loop') async with webutils.WebSpider(ev_loop) as spider: spider.header.update({'Host': 'www.66ip.cn'}) area = 33 page = 1 proxies = [] for area_index in range(1, area + 1): asyncio.sleep(1, loop=ev_loop) for i in range(1, page + 1): url = "http://www.66ip.cn/areaindex_{}/{}.html".format( area_index, i) status, resp_html = await spider.get(url) if status != 200: continue html_tree = etree.HTML(resp_html) tr_list = html_tree.xpath( "//*[@id='footer']/div/table/tr[position()>1]") if len(tr_list) == 0: continue for tr in tr_list: proxies.append( models.ProxyTbl(host=tr.xpath("./td[1]/text()")[0], port=int( tr.xpath("./td[2]/text()")[0]), country='中国', area=tr.xpath("./td[3]/text()")[0], scheme='http')) return proxies
async def _ip_check_taobao(self, pp: models.ProxyTbl) -> (bool, models.ProxyTbl): if pp.scheme.lower() == 'https': url = 'http://ip.taobao.com/service/getIpInfo2.php?ip=myip' else: url = 'http://ip.taobao.com/service/getIpInfo2.php?ip=myip' async with self._sess.get( url, proxy='{0}://{1}:{2}'.format( pp.scheme if pp.scheme is not None else 'http', pp.host, pp.port)) as resp: if resp.status != 200: return False, pp try: json_pp = await resp.json(encoding='utf-8', content_type=None) except json.JSONDecodeError: return False, pp if (json_pp is None) \ or (json_pp['code'] != 0) \ or (json_pp['data']['ip'] != pp.host): return False, pp new_pp = models.ProxyTbl( host=pp.host, port=pp.port, scheme=pp.scheme if pp.scheme is not None else 'http', country=json_pp['data']['country'], area='%s.%s' % (json_pp['data']['region'], json_pp['data']['city'])) return True, new_pp
async def yield_proxy(self, *args, **kwargs): ev_loop = kwargs.get('ev_loop') async with webutils.WebSpider(ev_loop) as spider: spider.header.update({'Host': 'www.xicidaili.com'}) proxies = [] url_list = [ 'http://www.xicidaili.com/nn/', # 高匿 'http://www.xicidaili.com/nt/', # 透明 'http://www.xicidaili.com/wn/', # 国内https 'http://www.xicidaili.com/wt/', # 国内普通 ] page = 2 for url in url_list: for i in range(1, page + 1): asyncio.sleep(1, loop=ev_loop) url = url + str(i) status, resp_html = await spider.get(url) if status != 200: continue html_tree = etree.HTML(resp_html) ip_list = html_tree.xpath( '//table[@id="ip_list"]//tr[position()>1]') for tr in ip_list: tds = tr.xpath("td") if len(tds) < 5: continue location = tds[3].xpath('a') if len(location) >= 1: location = location[0].text else: location = tds[3].text proxies.append( models.ProxyTbl(host=str(tds[1].text), port=int(tds[2].text), country='中国', area=str(location), scheme=str(tds[5].text).lower())) return proxies
async def _ip_check_360(self, pp: models.ProxyTbl) -> (bool, models.ProxyTbl): async with self._sess.get( 'http://ip.360.cn/IPQuery/ipquery?ip=%s' % pp.host, proxy='{0}://{1}:{2}'.format( pp.scheme if pp.scheme is not None else 'http', pp.host, pp.port)) as resp: if resp.status != 200: return False, pp try: json_pp = await resp.json(encoding='utf-8', content_type=None) except json.JSONDecodeError: return False, pp if (json_pp is None) or (json_pp['errno'] != 0): return False, pp loc_pair = json_pp['data'].strip().split('\t') if len(loc_pair) == 1: country = loc_pair[0] area = 'XX' elif len(loc_pair) == 2: if any(t in loc_pair[0] for t in ('台湾', '香港', '澳门')): country = loc_pair[0][0:2] area = loc_pair[0][2:] if not area: area = 'XX' else: country = '中国' area = loc_pair[0] else: mylog.warning('%s 无法解析', json_pp) return False, pp new_pp = models.ProxyTbl( host=pp.host, port=pp.port, scheme=pp.scheme if pp.scheme is not None else 'http', country=country, area=area) return True, new_pp