def html2text(html): """use html2text but repair newlines cutting urls""" txt = html2text_orig(html) links = list(link_re.finditer(txt)) # replace links out = StringIO() pos = 0 for l in links: out.write(txt[pos : l.start()]) out.write(l.group().replace("\n", "")) pos = l.end() out.write(txt[pos:]) return out.getvalue()
def html2text(html): """Use html2text but repair newlines cutting urls. Need to use this hack until https://github.com/aaronsw/html2text/issues/#issue/7 is not fixed""" txt = html2text_orig(html) links = list(LINK_RE.finditer(txt)) out = StringIO() pos = 0 for l in links: out.write(txt[pos:l.start()]) out.write(l.group().replace('\n', '')) pos = l.end() out.write(txt[pos:]) return out.getvalue()