def filte(content): soup = BeautifulSoup(content) proxy_list_info = soup.findAll('tr') proxy_list = [] for proxy in proxy_list_info: td_index = 0 proxy_tds = proxy.findAll('td') has_get = False proxy = Proxy(); for proxy_td in proxy_tds: td_index += 1 if td_index == 2: has_get = True proxy.ip = proxy_td.text elif td_index == 3: proxy.port = proxy_td.text elif td_index == 4: if not proxy_td.a == None: proxy.location = proxy_td.a.text elif td_index == 5: proxy.anonymous_type = proxy_td.text elif td_index == 6: proxy.proxy_type = proxy_td.text.lower() if has_get: proxy_list.append(proxy) return proxy_list
def filte(content): soup = BeautifulSoup(content) proxy_list_info = soup.findAll('tr') proxy_list = [] for proxy in proxy_list_info: td_index = 0 proxy_tds = proxy.findAll('td') has_get = False proxy = Proxy() for proxy_td in proxy_tds: td_index += 1 if td_index == 2: has_get = True proxy.ip = proxy_td.text elif td_index == 3: proxy.port = proxy_td.text elif td_index == 4: if not proxy_td.a == None: proxy.location = proxy_td.a.text elif td_index == 5: proxy.anonymous_type = proxy_td.text elif td_index == 6: proxy.proxy_type = proxy_td.text.lower() if has_get: proxy_list.append(proxy) return proxy_list
def filte(content): soup = BeautifulSoup(content) proxy_list_tables = soup.findAll('table') table_index = 0 pattern = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}') proxy_list = [] for table in proxy_list_tables: table_index += 1 if table_index == 3: proxy_list_info = table.findAll('tr') for proxy in proxy_list_info: td_index = 0 proxy_tds = proxy.findAll('td') proxy = Proxy(); is_proxy = False for proxy_td in proxy_tds: td_index += 1 if td_index == 2: rel_ip_info = re.search(pattern, proxy_td.text) if rel_ip_info: proxy.ip = rel_ip_info.group(0) is_proxy = True elif td_index == 3: if is_proxy: proxy.port = int(proxy_td.text) elif td_index == 4: if is_proxy: if '匿名代理' == proxy_td.text or '高度匿名' == proxy_td.text: proxy.anonymous_type = '高匿' else: proxy.anonymous_type = '透明' elif td_index == 5: if is_proxy: proxy.location = proxy_td.text proxy.proxy_type = 'http' if is_proxy: proxy_list.append(proxy) return proxy_list
def filte(content): soup = BeautifulSoup(content) proxy_list_tables = soup.findAll('table') table_index = 0 pattern = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}') proxy_list = [] for table in proxy_list_tables: table_index += 1 if table_index == 3: proxy_list_info = table.findAll('tr') for proxy in proxy_list_info: td_index = 0 proxy_tds = proxy.findAll('td') proxy = Proxy() is_proxy = False for proxy_td in proxy_tds: td_index += 1 if td_index == 2: rel_ip_info = re.search(pattern, proxy_td.text) if rel_ip_info: proxy.ip = rel_ip_info.group(0) is_proxy = True elif td_index == 3: if is_proxy: proxy.port = int(proxy_td.text) elif td_index == 4: if is_proxy: if '匿名代理' == proxy_td.text or '高度匿名' == proxy_td.text: proxy.anonymous_type = '高匿' else: proxy.anonymous_type = '透明' elif td_index == 5: if is_proxy: proxy.location = proxy_td.text proxy.proxy_type = 'http' if is_proxy: proxy_list.append(proxy) return proxy_list
return (False, 0) except: return (False, 0) def check_google(proxy_info): proxy_content = proxy_info.ip + ":" + str(proxy_info.port) proxy = urllib2.ProxyHandler({proxy_info.proxy_type: proxy_content}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) try: time1 = time.time() response = urllib2.urlopen(GOOGLE_CHECK_URL, timeout=3) title = BeautifulSoup(response.read()).title.text if "Google" == str(title): proxy_info.check_time = str(datetime.now()).split(".")[0] return (True, (time.time() - time1) * 1000) else: return (False, 0) except: return (False, 0) if __name__ == "__main__": proxy = Proxy() proxy.ip = "222.74.6.48" proxy.port = "8000" proxy.proxy_type = "http" default_ip = get_default_ip() print check_anonymous(proxy, default_ip)
return (True, (time.time() - time1) * 1000) return (False, 0) except: return (False, 0) def check_google(proxy_info): proxy_content = proxy_info.ip + ':' + str(proxy_info.port) proxy = urllib2.ProxyHandler({proxy_info.proxy_type : proxy_content}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) try: time1 = time.time() response = urllib2.urlopen(GOOGLE_CHECK_URL, timeout=3) title = BeautifulSoup(response.read()).title.text if 'Google' == str(title): proxy_info.check_time = str(datetime.now()).split('.')[0] return (True, (time.time() - time1) * 1000) else: return (False, 0) except: return (False, 0) if __name__ == '__main__': proxy = Proxy() proxy.ip = '222.74.6.48' proxy.port = '8000' proxy.proxy_type = 'http' default_ip = get_default_ip() print check_anonymous(proxy, default_ip)
return (False, 0) except: return (False, 0) def check_google(proxy_info): proxy_content = proxy_info.ip + ':' + str(proxy_info.port) proxy = urllib2.ProxyHandler({proxy_info.proxy_type : proxy_content}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) try: time1 = time.time() response = urllib2.urlopen(GOOGLE_CHECK_URL, timeout=3) title = BeautifulSoup(response.read()).title.text if 'Google' == str(title): proxy_info.check_time = str(datetime.now()).split('.')[0] return (True, (time.time() - time1) * 1000) else: return (False, 0) except: return (False, 0) if __name__ == '__main__': proxy = Proxy() proxy.ip = '222.74.6.48' proxy.port = '8000' proxy.proxy_type = 'http' default_ip = get_default_ip() print check_anonymous(proxy, default_ip)