Ejemplo n.º 1
0
def convert_to_text(top_node):
    txts = []
    for node in list(top_node):
        txt = parser.get_text(node)
        if txt:
            txt = HTMLParser().unescape(txt)
            txts.append(inner_trim(txt))
    return '\n\n'.join(txts)
Ejemplo n.º 2
0
def get_text(node):
    txts = [i for i in node.itertext()]
    return inner_trim(u' '.join(txts).strip())