Exemple #1
0
def get_proxy(item):
    try:
        proxy = Proxy()
        td = item.find_all('td')
        proxy.country = td[0].find('img')['alt']
        proxy.ip = td[1].text
        proxy.port = td[2].text
        proxy.address = td[3].find('a').text
        proxy.status = td[5].text
        proxy.speed = td[6].find('div')['title'][:-1]
        proxy.ping = td[7].find('div')['title'][:-1]
        proxy.live_time = td[8].text
        proxy.timestamp = '20' + td[9].text + ':00'
        proxy.insert()
    except TypeError:
        print('GET PROXY NONE TYPE')
def search_proxy():
    url = "http://cn-proxy.com/"
    data = ""
    # while not data or data == "":
    #     try:
    #         data = requests.get(url).text
    #     except Exception:
    #         sleep(2)
    #         continue

    with open('/home/chenxiao/document/data', 'rt') as f:
        data = f.read()

    soup = BeautifulSoup(data, 'html.parser')
    tbody = soup.findAll('tbody')[1]
    tr_list = tbody.findAll('tr')
    for tr in tr_list:
        td_list = tr.findAll('td')

        proxy = Proxy()

        speed = get_speed(td_list[3])
        if speed < 70:
            continue

        proxy.speed = speed
        proxy.url = td_list[0].text + ":" + td_list[1].text
        proxy.position = td_list[2].text
        time_string = td_list[4].text
        time = datetime.datetime.strptime(time_string, '%Y-%m-%d %H:%M:%S')
        time = timezone('Asia/Shanghai').localize(time)
        utc_time = time.astimezone(utc)
        proxy.last_check = utc_time

        if not Proxy.objects(url=proxy.url):
            print("加入代理服务器:   {}".format(proxy.url))
            proxy.save()