Ejemplo n.º 1
0
def ip_spider2():
    ip = SuperSpider(host='192.168.0.172',
                     table_name='ip_pool',
                     field_list=[
                         'spider_datetime', 'source_name', 'source_page', 'ip',
                         'address'
                     ])
    ip.source_name = '89免费代理'
    page = 1
    while True:
        ip.source_page = f'http://www.89ip.cn/index_{page}.html'
        data_list = ip.data_search(f'http://www.89ip.cn/index_{page}.html',
                                   '//table[@class="layui-table"]//td/text()')
        if not data_list:
            break
        print(f'第{page}页')
        for i in range(0, 75, 5):
            try:
                ip_value = data_list[i].strip(' \n\t')
                ip_port = data_list[i + 1].strip(' \n\t')
                ip.ip = f"http://{ip_value}:{ip_port}"
                ip.address = data_list[i + 2].strip(' \n\t')
            except:
                break
            ip.data_save()
            print(f'{ip.source_name}-第{page}页-{ip.ip}-导入完成')
        page += 1
        time.sleep(2)
    ip.spider_end()
Ejemplo n.º 2
0
def ip_spider5():
    ip = SuperSpider(host='192.168.0.172',
                     table_name='ip_pool',
                     field_list=[
                         'spider_datetime', 'source_name', 'source_page', 'ip',
                         'address'
                     ])
    ip.source_name = '开心代理'
    page = 1
    while True:
        ip.source_page = f'http://ip.kxdaili.com/ipList/{page}.html#ip'
        data_list = ip.data_search(
            f'http://ip.kxdaili.com/ipList/{page}.html#ip',
            '//table[@class="ui table segment"]//td/text()')
        if not data_list:
            break
        for i in range(0, 70, 7):
            ip.address = data_list[i + 5]
            h_list = data_list[i + 3].split(',')
            for h in h_list:
                ip.ip = f'{h.lower()}://{data_list[i]}:{data_list[i+1]}'
                ip.data_save()
                print(f'{ip.source_name}-第{page}页-{ip.ip}-导入完成')
        page += 1
    ip.spider_end()
Ejemplo n.º 3
0
def ip_spider7():
    ip = SuperSpider(host='192.168.0.172',
                     table_name='ip_pool',
                     field_list=[
                         'spider_datetime', 'source_name', 'source_page', 'ip',
                         'address'
                     ])
    page_all = ip.data_search('http://www.66ip.cn/index.html',
                              '//div[@id="PageList"]//a[last()-1]/text()')[0]
    for page in range(1, int(page_all) + 1):
        data_list = ip.data_search(
            f'http://www.66ip.cn/{page}.html',
            '//div[@class="containerbox boxindex"]//table//tr//text()',
            'gbk')[5:]
        for i in range(0, 10000, 5):
            try:
                ip.ip = f'http://{data_list[i]}:{data_list[i+1]}'
                ip.address = data_list[i + 2]
                ip.source_name = '66代理'
                ip.source_page = f'http://www.66ip.cn/{page}.html'
                ip.data_save()
                print(f'{ip.source_name}-第{page}页-{ip.ip}-导入完成')
            except:
                break
    ip.spider_end()
Ejemplo n.º 4
0
def ip_spider4():
    ip = SuperSpider(host='192.168.0.172',
                     table_name='ip_pool',
                     field_list=[
                         'spider_datetime', 'source_name', 'source_page', 'ip',
                         'address'
                     ])
    ip.source_name = '方法SEO'
    ip.source_page = 'https://ip.seofangfa.com/'
    data_list = ip.data_search('https://ip.seofangfa.com/',
                               '//table[@class="table"]//td/text()')
    for i in range(0, 250, 5):
        ip.ip = f'http://{data_list[i]}:{data_list[i+1]}'
        ip.address = data_list[i + 3]
        ip.data_save()
        print(f'{ip.source_name}-{ip.ip}-导入完成')
    ip.spider_end()
Ejemplo n.º 5
0
def ip_spider1():
    ip = SuperSpider(host='192.168.0.172',
                     table_name='ip_pool',
                     field_list=[
                         'spider_datetime', 'source_name', 'source_page', 'ip',
                         'address'
                     ])
    ip.source_name = '快代理'
    for page in range(1, 100):
        print(f'第{page}页')
        ip.source_page = f'https://www.kuaidaili.com/free/inha/{page}/'
        data_list = ip.data_search(
            f'https://www.kuaidaili.com/free/inha/{page}/',
            '//table[@class="table table-bordered table-striped"]//td/text()')
        for i in range(0, 105, 7):
            try:
                ip.ip = f'http://{data_list[i]}:{data_list[i+1]}'
                ip.address = data_list[i + 4]
            except:
                break
            ip.data_save()
            print(f'{ip.source_name}-第{page}页-{ip.ip}-导入完成')
        time.sleep(10)
    ip.spider_end()