def EpozTidy(self, html, pageurl): """ Take html and deliver xhtml if mxTidy is installed; call EpozPostTidy for html-postprocessings before returning the result """ errors = 0 output = html errordata = "" input = html.encode("utf-8") input = EPOZ_SCRIPT.sub('<script ', input) input = input.replace('</epoz:script>', '</script>') if uWordUnmunger: input = unmungeString(input) if mxTidyIsAvailable: (errors, warnings, output, errordata) = Tidy.tidy( input, drop_empty_paras=1, logical_emphasis=1, indent_spaces=1, indent="no", output_xhtml=1, word_2000=1, wrap=0, alt_text='', char_encoding="utf8") # (errors, warnings, output, errordata) = Tidy.tidy( # input, drop_empty_paras=1, indent_spaces=1, indent="auto", # output_xhtml=1, word_2000=1, wrap=79, char_encoding="utf8") if errors: output = html elif uTidyIsAvailable: parsed = tidy.parseString( input, drop_empty_paras=1, indent_spaces=1, indent="auto", output_xhtml=1, word_2000=1, wrap=79, char_encoding="utf8", add_xml_decl=0, doctype="omit", indent_attributes=1, drop_proprietary_attributes=1, bare=1, clean=1, enclose_text=1, tidy_mark=0) reports = parsed.get_errors() all_errors = [str(x) for x in reports if x.severity != 'W'] errors = len(all_errors) errordata = '\n'.join(all_errors) if errors: output = html else: output = str(parsed) output = MSO_CLASS.sub(r"<\1>", output) result = HTML_BODY.search(output) if result: output = result.group(1) # Call External Method / PythonScript for postprocessing # The script should expect two parameters: # self = called context (=server) # html = the htmlbody to postprocess # pathname = path of edited object (maybe with template!) # The script should return the new htmlbody EpozPostTidy = getattr(self, 'EpozPostTidy', None) if EpozPostTidy is not None: output = EpozPostTidy(self, output, pageurl) return (errors, output, errordata)
</head> <body> ---ö---ü--- </body> </html> """ ### Testing string buffer interface... print 'Testing file parsing...', open('testWalter.xml', 'wb').write(data) (nerrors, nwarnings, outputdata, error) = Tidy.tidy(open('testWalter.xml', 'rb'), open('output.xml', 'wb'), numeric_entities=1, output_xhtml=1, char_encoding="latin1") print error, outputdata = open('output.xml', 'rb').read() if outputdata != verifydata: print '*** Output does NOT verify OK !' else: print 'OK' print ### Testing file interface... print 'Testing data parsing...', (nerrors, nwarnings, outputdata, error) = Tidy.tidy(data, numeric_entities=1,
<title>---ä---ä---</title> </head> <body> ---ö---ü--- </body> </html> """ ### Testing string buffer interface... print 'Testing file parsing...', open('testWalter.xml', 'wb').write(data) (nerrors, nwarnings, outputdata, error) = Tidy.tidy(open('testWalter.xml', 'rb'), open('output.xml', 'wb'), numeric_entities=1, output_xhtml=1, char_encoding="latin1") print error, outputdata = open('output.xml', 'rb').read() if outputdata != verifydata: print '*** Output does NOT verify OK !' else: print 'OK' print ### Testing file interface... print 'Testing data parsing...', (nerrors, nwarnings, outputdata, error) = Tidy.tidy(data, numeric_entities=1,