Esempio n. 1
0
    async def yield_proxy(self, *args, **kwargs):
        # https://www.nyloner.cn/proxy
        ev_loop = kwargs.get('ev_loop')

        async with webutils.WebSpider(ev_loop) as spider:
            spider.header.update({'Host': 'www.nyloner.cn', 'Referer': 'https://www.nyloner.cn/proxy'})
            proxies = []
            num = 15
            status, _ = await spider.get('https://www.nyloner.cn/proxy')
            if status != 200:
                mylog.error('%s 访问出错', __name__)
                return proxies
            for page in range(1, 50):
                t = int(datetime.datetime.now().timestamp())
                status, resp_html = await spider.get('https://www.nyloner.cn/proxy', params={
                    'page': page, 'num': num, 't': t, 'token': self.gen_token(page, num, t)})
                if status != 200:
                    continue
                try:
                    js_result = json.loads(resp_html, encoding='utf-8')
                    if js_result['status'].lower() == 'true':
                        for pd in json.loads(self.decode_str(js_result['list'])):
                            proxies.append(models.ProxyTbl(host=pd['ip'], port=int(pd['port']),
                                                           scheme='http', country='未知'))
                except json.JSONDecodeError as er:
                    mylog.warning('%s 解析返回值<%s>出错: %s', __name__, resp_html, er)
                    return proxies

        return proxies
Esempio n. 2
0
    async def yield_proxy(self, *args, **kwargs):
        ev_loop = kwargs.get('ev_loop')

        async with webutils.WebSpider(ev_loop) as spider:
            spider.header.update({'Host': 'www.kewangst.com', 'Referer': 'https://www.kewangst.com/ProxyList'})
            proxies = []
            status, resp_text = await spider.get('https://www.kewangst.com/ProxyList')
            if status != 200:
                mylog.error('%s 访问出错', __name__)
                return proxies
            with io.StringIO(resp_text) as fp:
                while True:
                    line = fp.readline()
                    if line:
                        line = line.strip()
                    else:
                        break
                    if line.startswith('http'):
                        try:
                            parse_result = urllib.parse.urlparse(line)
                            proxies.append(models.ProxyTbl(
                                host=parse_result.hostname, port=parse_result.port, scheme=parse_result.scheme,
                                country='未知'))
                        except ValueError as e:
                            mylog.warning(e)

        return proxies
Esempio n. 3
0
    async def yield_proxy(self, *args, **kwargs):
        ev_loop = kwargs.get('ev_loop')
        async with webutils.WebSpider(ev_loop) as spider:
            spider.header.update({'Host': 'www.66ip.cn'})

            area = 33
            page = 1
            proxies = []
            for area_index in range(1, area + 1):
                asyncio.sleep(1, loop=ev_loop)
                for i in range(1, page + 1):
                    url = "http://www.66ip.cn/areaindex_{}/{}.html".format(
                        area_index, i)
                    status, resp_html = await spider.get(url)
                    if status != 200:
                        continue
                    html_tree = etree.HTML(resp_html)
                    tr_list = html_tree.xpath(
                        "//*[@id='footer']/div/table/tr[position()>1]")
                    if len(tr_list) == 0:
                        continue
                    for tr in tr_list:
                        proxies.append(
                            models.ProxyTbl(host=tr.xpath("./td[1]/text()")[0],
                                            port=int(
                                                tr.xpath("./td[2]/text()")[0]),
                                            country='中国',
                                            area=tr.xpath("./td[3]/text()")[0],
                                            scheme='http'))
            return proxies
Esempio n. 4
0
 async def _ip_check_taobao(self,
                            pp: models.ProxyTbl) -> (bool, models.ProxyTbl):
     if pp.scheme.lower() == 'https':
         url = 'http://ip.taobao.com/service/getIpInfo2.php?ip=myip'
     else:
         url = 'http://ip.taobao.com/service/getIpInfo2.php?ip=myip'
     async with self._sess.get(
             url,
             proxy='{0}://{1}:{2}'.format(
                 pp.scheme if pp.scheme is not None else 'http', pp.host,
                 pp.port)) as resp:
         if resp.status != 200:
             return False, pp
         try:
             json_pp = await resp.json(encoding='utf-8', content_type=None)
         except json.JSONDecodeError:
             return False, pp
         if (json_pp is None) \
                 or (json_pp['code'] != 0) \
                 or (json_pp['data']['ip'] != pp.host):
             return False, pp
         new_pp = models.ProxyTbl(
             host=pp.host,
             port=pp.port,
             scheme=pp.scheme if pp.scheme is not None else 'http',
             country=json_pp['data']['country'],
             area='%s.%s' %
             (json_pp['data']['region'], json_pp['data']['city']))
         return True, new_pp
Esempio n. 5
0
    async def yield_proxy(self, *args, **kwargs):
        ev_loop = kwargs.get('ev_loop')
        async with webutils.WebSpider(ev_loop) as spider:
            spider.header.update({'Host': 'www.xicidaili.com'})

            proxies = []

            url_list = [
                'http://www.xicidaili.com/nn/',  # 高匿
                'http://www.xicidaili.com/nt/',  # 透明
                'http://www.xicidaili.com/wn/',  # 国内https
                'http://www.xicidaili.com/wt/',  # 国内普通
            ]
            page = 2
            for url in url_list:
                for i in range(1, page + 1):
                    asyncio.sleep(1, loop=ev_loop)
                    url = url + str(i)
                    status, resp_html = await spider.get(url)
                    if status != 200:
                        continue
                    html_tree = etree.HTML(resp_html)
                    ip_list = html_tree.xpath(
                        '//table[@id="ip_list"]//tr[position()>1]')
                    for tr in ip_list:
                        tds = tr.xpath("td")
                        if len(tds) < 5:
                            continue

                        location = tds[3].xpath('a')
                        if len(location) >= 1:
                            location = location[0].text
                        else:
                            location = tds[3].text
                        proxies.append(
                            models.ProxyTbl(host=str(tds[1].text),
                                            port=int(tds[2].text),
                                            country='中国',
                                            area=str(location),
                                            scheme=str(tds[5].text).lower()))

            return proxies
Esempio n. 6
0
    async def _ip_check_360(self,
                            pp: models.ProxyTbl) -> (bool, models.ProxyTbl):
        async with self._sess.get(
                'http://ip.360.cn/IPQuery/ipquery?ip=%s' % pp.host,
                proxy='{0}://{1}:{2}'.format(
                    pp.scheme if pp.scheme is not None else 'http', pp.host,
                    pp.port)) as resp:
            if resp.status != 200:
                return False, pp
            try:
                json_pp = await resp.json(encoding='utf-8', content_type=None)
            except json.JSONDecodeError:
                return False, pp
            if (json_pp is None) or (json_pp['errno'] != 0):
                return False, pp

            loc_pair = json_pp['data'].strip().split('\t')
            if len(loc_pair) == 1:
                country = loc_pair[0]
                area = 'XX'
            elif len(loc_pair) == 2:
                if any(t in loc_pair[0] for t in ('台湾', '香港', '澳门')):
                    country = loc_pair[0][0:2]
                    area = loc_pair[0][2:]
                    if not area:
                        area = 'XX'
                else:
                    country = '中国'
                    area = loc_pair[0]
            else:
                mylog.warning('%s 无法解析', json_pp)
                return False, pp

            new_pp = models.ProxyTbl(
                host=pp.host,
                port=pp.port,
                scheme=pp.scheme if pp.scheme is not None else 'http',
                country=country,
                area=area)
            return True, new_pp