def url_list():
    list_u = IOutils.readfile()

    return list_u
Exemple #2
0
# -*- coding:utf-8 -*-
'''
批量获取所有页面地址
需要配合反爬
'''

import IOutils
import get_pages
import numpy
import time
import re

index_list = IOutils.readfile()

# 列表字符处理
L = index_list.replace('[', '')
L = L.replace(']', '')
L = L.replace('\'', '')
list_u = L.split(",")

list_u = list(set(list_u))

print list_u

for url in list_u:
    '''
    再次处理url列表
    '''
    key = str(url)
    regx = r'http\:\/\/[a-z]+\.58\.com\/(chuzu|ershoufang|pinpaigongyu)\/'
    pattern1 = re.compile(regx)