Exemple #1
0
def crawlProxy(kuai_type):
    """爬取快代理数据"""

    for i in range(1, 100):
        url = kuai_index.format(kuai_type, str(i))
        resp = requests.get(url)
        if 200 == resp.status_code:
            logging.info('Start Crawl kuai: %s', url)
            bs = BeautifulSoup(resp.text, 'html.parser')
            for t_body in bs.find_all('tbody'):
                for tr in t_body.find_all('tr'):
                    td_list = tr.find_all('td')
                    ip, port, proxy_type = td_list[0].text, td_list[1].text, td_list[3].text
                    judgeProxy(ip, port, proxy_type.lower())
Exemple #2
0
def crawlProxy(xc_type):
    """爬取西刺代理数据"""

    for i in range(1, 100):
        url = xc_index.format(xc_type, str(i))
        resp = requests.get(url, headers=headers)
        if 200 == resp.status_code:
            logging.info('Start Crawl xici: %s', url)
            bs = BeautifulSoup(resp.text, 'html.parser')
            for tr in bs.find_all('tr'):
                if tr.find('td', class_='country'):
                    td_list = tr.find_all('td')
                    ip, port, proxy_type = td_list[1].text, td_list[
                        2].text, td_list[5].text
                    judgeProxy(ip, port, proxy_type.lower())