def url_list(): list_u = IOutils.readfile() return list_u
# -*- coding:utf-8 -*- ''' 批量获取所有页面地址 需要配合反爬 ''' import IOutils import get_pages import numpy import time import re index_list = IOutils.readfile() # 列表字符处理 L = index_list.replace('[', '') L = L.replace(']', '') L = L.replace('\'', '') list_u = L.split(",") list_u = list(set(list_u)) print list_u for url in list_u: ''' 再次处理url列表 ''' key = str(url) regx = r'http\:\/\/[a-z]+\.58\.com\/(chuzu|ershoufang|pinpaigongyu)\/' pattern1 = re.compile(regx)