def synchronize(self): super(BlogEntry, self).synchronize() text_content = TextConverter.html_to_text(self.content) self.snippet = text_content[:128] if float(len(chinese_character_pattern.findall(text_content))) / len( text_content) > 0.1: self.lang = 'cn'
def test_html_to_text(self): """Convert html source code to text, like you copy the text from a browser""" assert TextConverter.html_to_text("<p>aaa<br/>\nbbb</p>") == "\naaa\n bbb\n" assert TextConverter.html_to_text("<div>aaa<br/>\nbbb</div>") == "aaa\n bbb\n" assert TextConverter.html_to_text("<div>aaa\n\n\n\nbbb</div>") == "aaa bbb\n"
def test_text_to_html(self): """Break text into HTML line breaks and paragraphs""" assert TextConverter.text_to_html("aaa\nbbb") == "<p>aaa<br/>\nbbb</p>" assert TextConverter.text_to_html("aaa\n\nbbb") == "<p>aaa</p>\n\n<p>bbb</p>" assert TextConverter.text_to_html("aaa\n\n\n\nbbb") == "<p>aaa</p>\n\n<p>bbb</p>"
def synchronize(self): super(BlogEntry, self).synchronize() text_content = TextConverter.html_to_text(self.content) self.snippet = text_content[:128] if float(len(chinese_character_pattern.findall(text_content))) / len(text_content) > 0.1: self.lang = 'cn'