def crawlProxy(kuai_type): """爬取快代理数据""" for i in range(1, 100): url = kuai_index.format(kuai_type, str(i)) resp = requests.get(url) if 200 == resp.status_code: logging.info('Start Crawl kuai: %s', url) bs = BeautifulSoup(resp.text, 'html.parser') for t_body in bs.find_all('tbody'): for tr in t_body.find_all('tr'): td_list = tr.find_all('td') ip, port, proxy_type = td_list[0].text, td_list[1].text, td_list[3].text judgeProxy(ip, port, proxy_type.lower())
def crawlProxy(xc_type): """爬取西刺代理数据""" for i in range(1, 100): url = xc_index.format(xc_type, str(i)) resp = requests.get(url, headers=headers) if 200 == resp.status_code: logging.info('Start Crawl xici: %s', url) bs = BeautifulSoup(resp.text, 'html.parser') for tr in bs.find_all('tr'): if tr.find('td', class_='country'): td_list = tr.find_all('td') ip, port, proxy_type = td_list[1].text, td_list[ 2].text, td_list[5].text judgeProxy(ip, port, proxy_type.lower())