def cleanup(self, text, html=True, trim=True): if html: text = strip_tags(text) text = self.unescape(text) if trim: text = "\n".join([x.strip() for x in text.splitlines()]) return text
def convert_text(value): """ Return text with only <br/> intact, all other tags stripped. """ # Essentially, convert newline to br, strip tags turns br into newlines and # removes everything else, nl2br turns it into br again :) try: return nl2br(strip_tags(nl2br(value))) except: return colander.null
def html_string_validator(node, value): """ checks that input doesn't contain html tags """ # removes tags and new lines and replaces <br> with newlines svalue = strip_tags(value) # removes newlines svalue = re.sub(r"\r?\n", " ", svalue) value = re.sub(r"\r?\n", " ", value) # removes duplicated whitespaces svalue = ' '.join(svalue.split()) value = ' '.join(value.split()) # if the original value and the stript value is not the same rais exception if not svalue == value: raise colander.Invalid(node, _(u"HTML is not allowed."))
def strip_and_truncate(text, limit=200, symbol='<span class="trunc">…</span>'): try: text = strip_tags(text) except Exception: logger.exception('strip_tags caused exception:') return u"Error: could not truncate text" out = "" pool = text while pool and len(out) < limit: word, pool = pool.partition(' ')[0::2] out += word + ' ' out = out.strip() if pool: out += symbol return out
def remove_formatting(string): """Simplify HTML text by removing tags and several kinds of formatting. If the ``unidecode`` package is installed, it will also transliterate non-ASCII Unicode characters to their nearest pronounciation equivalent in ASCII. Based on Ruby's stringex package (http://github.com/rsl/stringex/tree/master) """ s = strip_tags(string) s = convert_accented_entities(s) s = convert_misc_entities(s) #s = convert_misc_characters(s) if unidecode: s = unidecode(s) return collapse(s)
def remove_formatting(string): """Simplify HTML text by removing tags and several kinds of formatting. If the ``unidecode`` package is installed, it will also transliterate non-ASCII Unicode characters to their nearest pronunciation equivalent in ASCII. Based on Ruby's stringex package (http://github.com/rsl/stringex/tree/master) """ s = strip_tags(string) s = convert_accented_entities(s) s = convert_misc_entities(s) #s = convert_misc_characters(s) if unidecode: s = unidecode(s) return collapse(s)
def brief(self): text = strip_tags(self.wikitext_docs) if len(text) > 300: return truncate(text, 300) else: return ''
def excerpt(self): result = truncate(strip_tags(literal(self.body)), length=300, whole_word=True) return result
def render_readonly(self, **kwargs): value = super(EllipsysFieldRenderer, self).render_readonly(**kwargs) value = text.truncate(strip_tags(value), 30) if value else '' return value
def appstruct(self): appstruct = super(EditCommentForm, self).appstruct() # It would be nice if this was handled by colander instead :/ appstruct['body'] = strip_tags(appstruct['body']) return appstruct
def test_compare_strip_tags_to_sanitize(self): text = u'I <i>really</i> like <script language="javascript">NEFARIOUS CODE</script> steak!' eq_(strip_tags(text), render.sanitize(text))