def getAllProxy(pool_size=10, thread_or_process=True, is_refash=True): Log.v('正在更新ip池,请稍后...') # address = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + '/' if is_refash: proxys = GetFreeProxy.get_list_proxys() proxys = list(set(proxys)) sqlite = Sqlite(address + 'ip.db') sqlite.update_data('DELETE FROM ip_house') sqlite = Sqlite(address + 'ip.db') for i in range(len(proxys)): if proxys[i] and GetFreeProxy.verifyProxyFormat(proxys[i]): sqlite.cursor.execute( "INSERT INTO ip_house VALUES (?,?,?,?);", [i + 1, proxys[i], 'HTTP', 'true']) sqlite.conn.commit() sqlite.close_conn() GetFreeProxy.get_proxys_by_website() else: sqlite = Sqlite(address + 'ip.db') results = sqlite.query_data( 'select count(proxy_adress) from ip_house') if int(results[0][0]) == 0: proxys = GetFreeProxy.get_list_proxys() proxys = list(set(proxys)) sqlite = Sqlite(address + 'ip.db') for i in range(len(proxys)): if proxys[i] and GetFreeProxy.verifyProxyFormat(proxys[i]): sqlite.cursor.execute( "INSERT INTO ip_house VALUES (?,?,?,?);", [i + 1, proxys[i], 'HTTP', 'true']) sqlite.conn.commit() sqlite.close_conn() GetFreeProxy.get_proxys_by_website() sqlite = Sqlite(address + 'ip.db') results = sqlite.query_data( 'select id,proxy_adress,type from ip_house') params = [] for result in results: param = str(result[0]) + '&&' + result[1] + '&&' + result[2] params.append(param) Log.v("发现ip代理数量:" + str(len(params))) Log.v('正在检查ip可用性...') if thread_or_process: GetFreeProxy.exec_multi_threading(pool_size, params) else: GetFreeProxy.exec_multi_process(pool_size, params) Log.v('更新完成')
def get_proxys_by_website(): import time headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Upgrade-Insecure-Requests': '1', 'Host': 'www.xicidaili.com', 'Pragma': 'no-cache', 'Connection': 'keep-alive', 'Cache-Control': 'no-cache', 'Accept-Language': 'zh-CN,zh;q=0.9' } response = requests.get('https://www.xicidaili.com/nn/1', headers=headers, timeout=20) html = BeautifulSoup(response.text, 'html.parser') table = html.find('table', attrs={'id': 'ip_list'}) trs = table.find_all('tr') trs_list = [] trs_list.extend(trs[1:]) time.sleep(1) response = requests.get('https://www.xicidaili.com/nt/', headers=headers, timeout=20) html = BeautifulSoup(response.text, 'html.parser') table = html.find('table', attrs={'id': 'ip_list'}) trs = table.find_all('tr') trs_list.extend(trs[1:]) time.sleep(1) response = requests.get('https://www.xicidaili.com/wn/', headers=headers, timeout=20) html = BeautifulSoup(response.text, 'html.parser') table = html.find('table', attrs={'id': 'ip_list'}) trs = table.find_all('tr') trs_list.extend(trs[1:]) time.sleep(1) response = requests.get('https://www.xicidaili.com/wt/', headers=headers, timeout=20) html = BeautifulSoup(response.text, 'html.parser') table = html.find('table', attrs={'id': 'ip_list'}) trs = table.find_all('tr') trs_list.extend(trs[1:]) proxys = [] for i in range(0, len(trs_list)): tds = trs_list[i].find_all('td') ip = tds[1].get_text() port = tds[2].get_text() type = str(tds[5].get_text()).strip(' ') date_time = str(tds[9].get_text()).split(' ')[0] now = time.strftime("%y-%m-%d", time.localtime()) # if date_time == now: proxys.append(ip + ":" + port + '&' + type) proxys = list(set(proxys)) sqlite = Sqlite(address + 'ip.db') results = sqlite.query_data('select count(proxy_adress) from ip_house') sqlite = Sqlite(address + 'ip.db') num = int(results[0][0]) for i in range(len(proxys)): paras = proxys[i].split('&') if paras[0] and GetFreeProxy.verifyProxyFormat(paras[0]): sqlite.cursor.execute("INSERT INTO ip_house VALUES (?,?,?,?);", [num + i, paras[0], paras[1], 'true']) sqlite.conn.commit() sqlite.close_conn()