def _prepareForLoading(self, xhtmlString): u"""Prepares the raw xhtml string for loading into zDom.""" #$NON-NLS-1$ xhtmlString = xhtmlString.lstrip() if xhtmlString.startswith(u"<!DOCTYPE"): #$NON-NLS-1$ xhtmlString = xhtmlString[xhtmlString.find(u">") + 1:] #$NON-NLS-1$ xhtmlString = xhtmlString.replace(u' ', u' ') #$NON-NLS-1$ #$NON-NLS-2$ (bOk, xhtmlString) = self._cleanupMsOffice(xhtmlString) #@UnusedVariable # if the string content does not have a <body/> then convert to xhtml and wrap it # with <html><body/></html> if not xhtmlutil.hasBody(xhtmlString): if not xhtmlutil.hasXhtmlMarkup(xhtmlString): self.messages.append( u"Converting plain text to xhtml markup.") #$NON-NLS-1$ # convert plain text to xhtml transformer = ZTextToXhtmlTransformer() xhtmlString = transformer.transform(xhtmlString) xhtmlString = xhtmlutil.wrapHtmlBody(xhtmlString) self.messages.append( u"Adding <html><body></body></html> wrapper.") #$NON-NLS-1$ return xhtmlString
def _internalRunTidy(htmlSrc, options=XHTML_OPTIONS): # Runs tidy and returns tuple (html, errorList) unsupportedOptions = ["raw", "output_error", "show_warnings"] try: # remove unsupported options. if options: options['tidy_mark'] = 0 for s in unsupportedOptions: if options.has_key(s): del options[s] except: pass lineOffset = 0 if htmlSrc: # escape illegal entities. E.g. convert &##! to &##! try: htmlSrc = ILLEGAL_ENTITY_RE.sub(u"&\g<2>", htmlSrc) #$NON-NLS-1$ except: pass if not hasBody(htmlSrc): # wrap content inside a <html><head/><body> [CONTENT] </body></html> htmlSrc = XHTML_TEMPLATE % htmlSrc lineOffset = XHTML_TEMPLATE_LINE_OFFSET tidySrc = convertToUtf8(htmlSrc) tidyRet = tidy.parseString(tidySrc, **options) errList = [] severities = dict(W=ZTidyError.WARN, E=ZTidyError.ERROR, C=ZTidyError.OTHER) for err in tidyRet.get_errors(): te = ZTidyError() if err.line is not None: te.line = err.line - lineOffset if err.col is not None: te.col = err.col if err.message is not None: te.message = err.message te.severity = ZTidyError.NONE if severities.has_key(err.severity): te.severity = severities[err.severity] errList.append(te) outHtml = str(tidyRet) return (convertToUnicode(outHtml), errList)
def _internalRunTidy(htmlSrc, options = XHTML_OPTIONS): # Runs tidy and returns tuple (html, errorList) unsupportedOptions = ["raw", "output_error", "show_warnings"] try: # remove unsupported options. if options: options['tidy_mark'] = 0 for s in unsupportedOptions: if options.has_key(s): del options[s] except: pass lineOffset = 0 if htmlSrc: # escape illegal entities. E.g. convert &##! to &##! try: htmlSrc = ILLEGAL_ENTITY_RE.sub(u"&\g<2>", htmlSrc) #$NON-NLS-1$ except: pass if not hasBody(htmlSrc): # wrap content inside a <html><head/><body> [CONTENT] </body></html> htmlSrc = XHTML_TEMPLATE % htmlSrc lineOffset = XHTML_TEMPLATE_LINE_OFFSET tidySrc = convertToUtf8(htmlSrc) tidyRet = tidy.parseString(tidySrc, **options) errList = [] severities = dict(W=ZTidyError.WARN, E=ZTidyError.ERROR, C=ZTidyError.OTHER) for err in tidyRet.get_errors(): te = ZTidyError() if err.line is not None: te.line = err.line - lineOffset if err.col is not None: te.col = err.col if err.message is not None: te.message = err.message te.severity = ZTidyError.NONE if severities.has_key(err.severity): te.severity = severities[err.severity] errList.append(te) outHtml = str(tidyRet) return (convertToUnicode(outHtml), errList)
def _prepareForLoading(self, xhtmlString): u"""Prepares the raw xhtml string for loading into zDom.""" #$NON-NLS-1$ xhtmlString = xhtmlString.lstrip() if xhtmlString.startswith(u"<!DOCTYPE"): #$NON-NLS-1$ xhtmlString = xhtmlString[xhtmlString.find(u">") + 1:] #$NON-NLS-1$ xhtmlString = xhtmlString.replace(u' ', u' ') #$NON-NLS-1$ #$NON-NLS-2$ (bOk, xhtmlString) = self._cleanupMsOffice(xhtmlString) #@UnusedVariable # if the string content does not have a <body/> then convert to xhtml and wrap it # with <html><body/></html> if not xhtmlutil.hasBody(xhtmlString): if not xhtmlutil.hasXhtmlMarkup(xhtmlString): self.messages.append(u"Converting plain text to xhtml markup.") #$NON-NLS-1$ # convert plain text to xhtml transformer = ZTextToXhtmlTransformer() xhtmlString = transformer.transform(xhtmlString) xhtmlString = xhtmlutil.wrapHtmlBody(xhtmlString) self.messages.append(u"Adding <html><body></body></html> wrapper.") #$NON-NLS-1$ return xhtmlString