Beispiel #1
0
def get_hot_bbs_list():
    page_data = helper.data_from_url(topic_url)
#    html = change_code(html, 'gbk', 'utf-8')
    root = etree.HTML(page_data)
    trs = root.xpath('//tr')
    if len(trs) < 2:
        return None
    topics_tr = trs[1:]
    result = []
    for a_topic in topics_tr:
        a_dic = {}
        tds = a_topic.findall('td')
        td_title = tds[0]
        td_author = tds[1]
        title_link = td_title.find('a')
        title = title_link.text
        title_ref = title_link.get('href')
        author_link = td_author.find('a')
        author = author_link.text
        author_ref = author_link.get('href')
        a_dic['title'] = title
        a_dic['title_ref'] = title_ref
        a_dic['author'] = author
        a_dic['author_ref'] = author_ref
        result.append(a_dic)
    return result
Beispiel #2
0
def posts_from(url):
    pate_data = helper.data_from_url(url)
    root =