Ejemplo n.º 1
0
 def _get_text_values_for_schema_node(self, node, value):
     result = []
     if not value: return result
     if type(node.typ) == colander.Mapping:
         for cnode in node.children:
             name = cnode.name
             val = value.get(name, None)
             if val:
                 result += self._get_text_values_for_schema_node(cnode, val)
     elif type(node.typ) == colander.Sequence:
         if node.children:
             cnode = node.children[0]
             for val in value:
                 result += self._get_text_values_for_schema_node(cnode, val)
     elif type(node.typ) == colander.Tuple:
         for (idx, cnode) in enumerate(node.children):
             result += self._get_text_values_for_schema_node(cnode, value[idx])
     elif type(node.typ) == colander.String:
         if getattr(node, 'include_in_text', True):
             if getattr(node, 'is_html', False):
                 value = html_to_text(value, 0)
             if value: result.append(value)
     #elif type(node.typ) == deform.FileData:
     #    pass # FIXME: handle PDF, Word, etc?
     return result
Ejemplo n.º 2
0
 def test_htmlutil_html_to_text(self):
     from audrey import htmlutil
     self.assertEqual(htmlutil.html_to_text(u'''<html><head><title>Title</title></head><body><h1>Header1</h1><p>Hello <unknown foo="bar">world!</unknown> Perhaps some other &#198;on...<br/><a href="http://python.org">Ooh, a link!</a><ul><li>animal</li><li>vegetable</li><li>mineral</li></ul></p><p>&copy; 2012</p></body></html>'''), u'''Header1\n\n\n\nHello world! Perhaps some other \xc6on...\nOoh, a link!\n\n- animal\n\n- vegetable\n\n- mineral\n\n\xa9 2012''')
     self.assertEqual(htmlutil.html_to_text('''<a href="http://python.org">Ooh, a link!</a>''', show_link_urls=True), '''Ooh, a link! [http://python.org]''')
     self.assertEqual(htmlutil.html_to_text('''Foo &meh; Bar''', unknown_entity_replacement='?'), 'Foo ? Bar')