def ip_spider2(): ip = SuperSpider(host='192.168.0.172', table_name='ip_pool', field_list=[ 'spider_datetime', 'source_name', 'source_page', 'ip', 'address' ]) ip.source_name = '89免费代理' page = 1 while True: ip.source_page = f'http://www.89ip.cn/index_{page}.html' data_list = ip.data_search(f'http://www.89ip.cn/index_{page}.html', '//table[@class="layui-table"]//td/text()') if not data_list: break print(f'第{page}页') for i in range(0, 75, 5): try: ip_value = data_list[i].strip(' \n\t') ip_port = data_list[i + 1].strip(' \n\t') ip.ip = f"http://{ip_value}:{ip_port}" ip.address = data_list[i + 2].strip(' \n\t') except: break ip.data_save() print(f'{ip.source_name}-第{page}页-{ip.ip}-导入完成') page += 1 time.sleep(2) ip.spider_end()
def ip_spider5(): ip = SuperSpider(host='192.168.0.172', table_name='ip_pool', field_list=[ 'spider_datetime', 'source_name', 'source_page', 'ip', 'address' ]) ip.source_name = '开心代理' page = 1 while True: ip.source_page = f'http://ip.kxdaili.com/ipList/{page}.html#ip' data_list = ip.data_search( f'http://ip.kxdaili.com/ipList/{page}.html#ip', '//table[@class="ui table segment"]//td/text()') if not data_list: break for i in range(0, 70, 7): ip.address = data_list[i + 5] h_list = data_list[i + 3].split(',') for h in h_list: ip.ip = f'{h.lower()}://{data_list[i]}:{data_list[i+1]}' ip.data_save() print(f'{ip.source_name}-第{page}页-{ip.ip}-导入完成') page += 1 ip.spider_end()
def ip_spider7(): ip = SuperSpider(host='192.168.0.172', table_name='ip_pool', field_list=[ 'spider_datetime', 'source_name', 'source_page', 'ip', 'address' ]) page_all = ip.data_search('http://www.66ip.cn/index.html', '//div[@id="PageList"]//a[last()-1]/text()')[0] for page in range(1, int(page_all) + 1): data_list = ip.data_search( f'http://www.66ip.cn/{page}.html', '//div[@class="containerbox boxindex"]//table//tr//text()', 'gbk')[5:] for i in range(0, 10000, 5): try: ip.ip = f'http://{data_list[i]}:{data_list[i+1]}' ip.address = data_list[i + 2] ip.source_name = '66代理' ip.source_page = f'http://www.66ip.cn/{page}.html' ip.data_save() print(f'{ip.source_name}-第{page}页-{ip.ip}-导入完成') except: break ip.spider_end()
def ip_spider4(): ip = SuperSpider(host='192.168.0.172', table_name='ip_pool', field_list=[ 'spider_datetime', 'source_name', 'source_page', 'ip', 'address' ]) ip.source_name = '方法SEO' ip.source_page = 'https://ip.seofangfa.com/' data_list = ip.data_search('https://ip.seofangfa.com/', '//table[@class="table"]//td/text()') for i in range(0, 250, 5): ip.ip = f'http://{data_list[i]}:{data_list[i+1]}' ip.address = data_list[i + 3] ip.data_save() print(f'{ip.source_name}-{ip.ip}-导入完成') ip.spider_end()
def ip_spider1(): ip = SuperSpider(host='192.168.0.172', table_name='ip_pool', field_list=[ 'spider_datetime', 'source_name', 'source_page', 'ip', 'address' ]) ip.source_name = '快代理' for page in range(1, 100): print(f'第{page}页') ip.source_page = f'https://www.kuaidaili.com/free/inha/{page}/' data_list = ip.data_search( f'https://www.kuaidaili.com/free/inha/{page}/', '//table[@class="table table-bordered table-striped"]//td/text()') for i in range(0, 105, 7): try: ip.ip = f'http://{data_list[i]}:{data_list[i+1]}' ip.address = data_list[i + 4] except: break ip.data_save() print(f'{ip.source_name}-第{page}页-{ip.ip}-导入完成') time.sleep(10) ip.spider_end()