def get_hot_bbs_list(): page_data = helper.data_from_url(topic_url) # html = change_code(html, 'gbk', 'utf-8') root = etree.HTML(page_data) trs = root.xpath('//tr') if len(trs) < 2: return None topics_tr = trs[1:] result = [] for a_topic in topics_tr: a_dic = {} tds = a_topic.findall('td') td_title = tds[0] td_author = tds[1] title_link = td_title.find('a') title = title_link.text title_ref = title_link.get('href') author_link = td_author.find('a') author = author_link.text author_ref = author_link.get('href') a_dic['title'] = title a_dic['title_ref'] = title_ref a_dic['author'] = author a_dic['author_ref'] = author_ref result.append(a_dic) return result
def posts_from(url): pate_data = helper.data_from_url(url) root =