def test_remove_nasties(self): nasty_string = "This string! has 9 (&#$# all \n 'sudo rm -rf /*' nasty stuff" sanitized = [ 'this', 'string', 'has', '9', 'all', 'sudo', 'rm', '-rf', 'nasty', 'stuff' ] self.assertEqual(sanitized_list(nasty_string), sanitized) self.assertEqual(sanitized_list(nasty_string, '[^a]'), ['a', 'a', 'a'])
def _findText(self, elem, tag_name): words = [] if elem.nodeType == elem.TEXT_NODE: words.extend(sanitized_list(elem.data)) elif elem.hasChildNodes(): for node in elem.childNodes: words.extend(self._findText(node, tag_name)) return words
def _findText(self, elem, tag_name): words=[] if elem.nodeType == elem.TEXT_NODE: words.extend(sanitized_list(elem.data)) elif elem.hasChildNodes(): for node in elem.childNodes: words.extend(self._findText(node, tag_name)) return words
def test_unicode(self): s = u'this is a unicode string' self.assertEqual(sanitized_list(s), ['this', 'is', 'a', 'unicode', 'string'])
def test_unicode(self): s=u'this is a unicode string' self.assertEqual(sanitized_list(s), ['this', 'is', 'a', 'unicode', 'string'])
def test_remove_nasties(self): nasty_string="This string! has 9 (&#$# all \n 'sudo rm -rf /*' nasty stuff" sanitized=['this', 'string', 'has', '9', 'all', 'sudo', 'rm', '-rf', 'nasty', 'stuff'] self.assertEqual(sanitized_list(nasty_string), sanitized) self.assertEqual(sanitized_list(nasty_string, '[^a]'), ['a', 'a', 'a'])