Example #1
0
def EpozTidy(self, html, pageurl):
    """ Take html and deliver xhtml if mxTidy is installed;
        call EpozPostTidy for html-postprocessings before returning the result
    """

    errors = 0
    output = html
    errordata = ""

    input = html.encode("utf-8")
    input = EPOZ_SCRIPT.sub('<script ', input)
    input = input.replace('</epoz:script>', '</script>')

    if uWordUnmunger:
        input = unmungeString(input)

    if mxTidyIsAvailable:
        (errors, warnings, output, errordata) = Tidy.tidy(
            input, drop_empty_paras=1, logical_emphasis=1, indent_spaces=1,
            indent="no", output_xhtml=1, word_2000=1, wrap=0, alt_text='',
            char_encoding="utf8")
#        (errors, warnings, output, errordata) = Tidy.tidy(
#            input, drop_empty_paras=1, indent_spaces=1, indent="auto",
#            output_xhtml=1, word_2000=1, wrap=79, char_encoding="utf8")
        if errors:
            output = html
    elif uTidyIsAvailable:
        parsed = tidy.parseString(
            input, drop_empty_paras=1, indent_spaces=1, indent="auto",
            output_xhtml=1, word_2000=1, wrap=79, char_encoding="utf8",
            add_xml_decl=0, doctype="omit", indent_attributes=1,
            drop_proprietary_attributes=1, bare=1, clean=1,
            enclose_text=1, tidy_mark=0)
        reports = parsed.get_errors()
        all_errors = [str(x) for x in reports if x.severity != 'W']
        errors = len(all_errors)
        errordata = '\n'.join(all_errors)
        if errors:
            output = html
        else:
            output = str(parsed)

    output = MSO_CLASS.sub(r"<\1>", output)
    result = HTML_BODY.search(output)
    if result:
        output = result.group(1)

    # Call External Method / PythonScript for postprocessing
    # The script should expect two parameters:
    # self = called context (=server)
    # html = the htmlbody to postprocess
    # pathname = path of edited object (maybe with template!)
    # The script should return the new htmlbody

    EpozPostTidy = getattr(self, 'EpozPostTidy', None)
    if EpozPostTidy is not None:
        output = EpozPostTidy(self, output, pageurl)

    return (errors, output, errordata)
</head>
<body>
---ö---ü---
</body>
</html>

"""

### Testing string buffer interface...

print 'Testing file parsing...',
open('testWalter.xml', 'wb').write(data)
(nerrors, nwarnings, outputdata,
 error) = Tidy.tidy(open('testWalter.xml', 'rb'),
                    open('output.xml', 'wb'),
                    numeric_entities=1,
                    output_xhtml=1,
                    char_encoding="latin1")
print error,
outputdata = open('output.xml', 'rb').read()
if outputdata != verifydata:
    print '*** Output does NOT verify OK !'
else:
    print 'OK'
print

### Testing file interface...

print 'Testing data parsing...',
(nerrors, nwarnings, outputdata, error) = Tidy.tidy(data,
                                                    numeric_entities=1,
<title>---ä---ä---</title>
</head>
<body>
---ö---ü---
</body>
</html>

"""

### Testing string buffer interface...

print 'Testing file parsing...',
open('testWalter.xml', 'wb').write(data)
(nerrors, nwarnings, outputdata, error) = Tidy.tidy(open('testWalter.xml', 'rb'), 
                                                    open('output.xml', 'wb'),
                                                    numeric_entities=1,
                                                    output_xhtml=1,
                                                    char_encoding="latin1")
print error,
outputdata = open('output.xml', 'rb').read()
if outputdata != verifydata:
    print '*** Output does NOT verify OK !'
else:
    print 'OK'
print

### Testing file interface...

print 'Testing data parsing...',
(nerrors, nwarnings, outputdata, error) = Tidy.tidy(data, 
                                                    numeric_entities=1,