コード例 #1
0
def parse_thread_page(el: bs4.element.Tag) -> AttrDict:
    out = AttrDict()
    out.user = el.select('.postprofile dt')[0].text.strip()
    out.body_html = str(el.select('.content')[0]).strip()
    out.body_text = el.select('.content')[0].text.strip()
    out.date = el.select('.postbody .author')[0].text.strip()
    return out
コード例 #2
0
def parse_link(link: bs4.element.Tag, domain: str) -> AttrDict:
    out = AttrDict()
    out.title = link.select('a:nth-of-type(1)')[0].text
    out.views = link.select('.views')[0].text.replace('Zugriffe', '').strip()
    out.answers = link.select('.posts')[0].text.replace('Antworten', '').strip()
    out.date = link.select('a:nth-of-type(3)')[0].text
    out.url = domain + link.select('a:nth-of-type(1)')[0].attrs['href'].replace('./', '/')
    return out