def downloadMain(): baseUrl = 'http://www.ygdy8.net/html/gndy/dyzz/list_23_%d.html' pattern = re.compile(r'</a>>') for i in range(1, 144): content = tools.getHtml(baseUrl % i)
def saveAndAnalyzeDate(content): baseUrl = 'http://www.ygdy8.net' pq = tools.pq t = tools.pq(content) items = t('.co_content8 tbody') for item in items: itemP = pq(item) text =itemP.find('a').attr('href').decode('gbk') url = itemP.find('font[color="#8F8C89"]').text()