def get_proxy(item): try: proxy = Proxy() td = item.find_all('td') proxy.country = td[0].find('img')['alt'] proxy.ip = td[1].text proxy.port = td[2].text proxy.address = td[3].find('a').text proxy.status = td[5].text proxy.speed = td[6].find('div')['title'][:-1] proxy.ping = td[7].find('div')['title'][:-1] proxy.live_time = td[8].text proxy.timestamp = '20' + td[9].text + ':00' proxy.insert() except TypeError: print('GET PROXY NONE TYPE')
def search_proxy(): url = "http://cn-proxy.com/" data = "" # while not data or data == "": # try: # data = requests.get(url).text # except Exception: # sleep(2) # continue with open('/home/chenxiao/document/data', 'rt') as f: data = f.read() soup = BeautifulSoup(data, 'html.parser') tbody = soup.findAll('tbody')[1] tr_list = tbody.findAll('tr') for tr in tr_list: td_list = tr.findAll('td') proxy = Proxy() speed = get_speed(td_list[3]) if speed < 70: continue proxy.speed = speed proxy.url = td_list[0].text + ":" + td_list[1].text proxy.position = td_list[2].text time_string = td_list[4].text time = datetime.datetime.strptime(time_string, '%Y-%m-%d %H:%M:%S') time = timezone('Asia/Shanghai').localize(time) utc_time = time.astimezone(utc) proxy.last_check = utc_time if not Proxy.objects(url=proxy.url): print("加入代理服务器: {}".format(proxy.url)) proxy.save()