def call_macro(wcontext, name, elements): """ Given a macro arglist (parsed from split_macro_args, for example), call the macro and return the result. Returns list of elements that the macro returns. On error, will raise WikError. """ plugins = load_plugins(wcontext.plugin_dirs) func = None handled = False # if macro_handler is defined, try it first so caller can override all # internal macros if wcontext.macro_handler is not None: try: handled, result = wcontext.macro_handler(name, wcontext, *elements) except: raise WikError( "ERROR CALLING MACRO <<%s>> with args:\n%s\n\n" % (name, xmltrace(elements)), '', exception_to_text(), '') # fall-through to set 'func' either way -- will check 'handled' at end # if not in restricted_mode, check for functions defined in embedded wikitext if func is None and not wcontext.restricted_mode: func = getattr(wcontext.mod_embedded, name, None) if func is not None and not callable(func): func = None # disregard non-functions # next, try unsafe macros if allowed to if func is None and not wcontext.restricted_mode: func = plugins.unsafe.get(name, None) # finally, try safe macros if func is None: func = plugins.safe.get(name, None) if func is None and not handled: raise WikError("Macro '%s' not defined, or not allowed to call." % name) # call with positional args and return value if not handled: try: result = func(wcontext, *elements) except: raise WikError( "ERROR CALLING MACRO <<%s>> with args:\n%s\n\n" % (name, xmltrace(elements)), '', exception_to_text(), '') return process_macro_result(wcontext, name, result)
def codebox(context, title, codetext): """ |!Usage|{{{<<codebox title arg [arg ...]>>}}}| |!Description|Like a regular code box, except with a title.| |!Example|{{{<<codebox "A Title" ''' for i in range(10): print i'''>>}}}| |!Result|<<codebox "A Title" ''' for i in range(10): print i'''>>| """ if codetext.tag != 'TextMacroArg': raise WikError("'codetext' must be a single text argument - got %s" % codetext.tag) container = DIV('wikkly-codebox-container') titlediv = DIV('wikkly-codebox-title') for node in eval_wiki_macro_args(context, [title]): titlediv.append(node) container.append(titlediv) body = DIV('wikkly-codebox-body') # place in TextCode element so it will get the proper code-style escaping #textnode = etree.Element('TextCode') #textnode.text = codetext.text textnode = Text(codetext.text, 'TextCode') body.append(textnode) container.append(body) return [container]
def insert_pycode(wcontext, txt): "Compile the given Python code, inserting any new functions into the callable list." try: # compile and exec code into module namespace -- this inserts any functions # and imported modules into mod_embedded namespace exec(txt) in wcontext.mod_embedded.__dict__ except: raise WikError("Error compiling <?py ... ?> macro", txt[:80], exception_to_text(), '') # add some default globals eglobals = { 'Element': Element, 'SubElement': SubElement, 'Text': Text, 'WikError': WikError, 'FS_CWD': wcontext.var_get_text('$FS_CWD'), } # add names from __all__ in plugins as well plugins = load_plugins(wcontext.plugin_dirs) eglobals.update(plugins.embed) for k, v in eglobals.items(): wcontext.mod_embedded.__dict__[k] = v
def beginHighlight(self, style=None): # supported highlight styles: # 1. @@text@@ - (style=None) - Apply standard highlight # 2. @@prop1: style one; prop2: style two; ... ; items@@ - Apply given style to items. # 3. @@color(color): ...@@ - Apply color # 4. @@bgcolor(color): .. @@ - Apply background color self.pushnew('Highlight') if style is not None: # try parsing case #2 (sync w/wikklytext.lexer:t_HIGHLIGHT_CSS) #m2 = re.match('@@((\s*[a-zA-Z]\S+\s*:\s*\S+\s*;)+)', style) m2 = re.match('@@((\s*[a-zA-Z][a-zA-Z0-9-]*\s*:.+?;)+)', style) # case #3 m3 = re.match(r'@@color\((.+?)\):', style) # case #4 m4 = re.match('@@bgcolor\((.+?)\):', style) # set style info into Highlight node; place text in Text under it if m2: # @@prop1: style1; prop2: style2; ... ; style = m2.group(1) elif m3: # @@color(..): ... @@ style = "color: %s;" % m3.group(1) elif m4: # @@bgcolor(..): ... @@ style = "background: %s;" % m4.group(1) else: raise WikError("Unknown style: %s" % repr(style)) if style is not None: self.curnode.set('style', style)
def endNoWiki(self): # like RawHTML, remove <NoWiki>, leaving TextNoWiki self.popto('NoWiki') # sanity if len(self.curnode) != 1 or self.curnode[0].tag != 'TextNoWiki': raise WikError("Internal error - bad nodes under <NoWiki>") rawnode = self.curnode txt = self.curnode[0] self.popnode('NoWiki') self.curnode.remove(rawnode) self.curnode.append(txt)
def process_macro_result(wcontext, name, result): from wikklytext.eval import eval_wiki_text if iselement(result) and result.tag == 'ElementList': # already list-like, don't need to wrap pass # turn single values into list and handle below elif iselement(result) or isinstance(result, (unicode, str)): result = [result] elif isinstance(result, (list, tuple)): pass else: raise WikError("Calling <<%s>>\nMacros must return Elements or Unicode\nGot: %s, '%s'" % \ (name, type(result), repr(result))) # now result can be handled as list/tuple - handle each element outnode = ElementList() for val in result: if iselement(val): outnode.append(val) # leave Elements alone elif isinstance(val, unicode): # parse Unicode -> Elements for e in eval_wiki_text(wcontext, val): outnode.append(e) elif isinstance(val, str): # note implicit unicode() conversion ... this is done for # convenience but macros should really return Unicode for # most robust code for e in eval_wiki_text(wcontext, unicode(val)): outnode.append(e) else: raise WikError("Calling <<%s>>\nMacros must return Elements or Unicode\nGot: %s, '%s'" % \ (name, type(val), repr(val))) return outnode
def endRawHTML(self): # should have: <RawHTML><TextHTML> ... </TextHTML></RawHTML> # remove the 'RawHTML' tag, promoting the TextHTML. (<RawHTML> was # only needed in order to create the correct type of <Text> node. # Can discard now since <TextHTML> captures everything.) self.popto('RawHTML') # sanity if len(self.curnode) != 1 or self.curnode[0].tag != 'TextHTML': raise WikError("Internal error - bad nodes under <RawHTML>") rawnode = self.curnode txt = self.curnode[0] self.popnode('RawHTML') self.curnode.remove(rawnode) self.curnode.append(txt)
def make_html(node, wcontext, parent_map, prevsib_map, add_classes): from time import time # check time limit at each node if time() > wcontext.stoptime: return 'Max runtime exceeded!!<p>' css = 'wikkly' simpletags = { 'Bold': ('b',None), 'Italic': ('i',None), 'Strikethrough': ('span','%s-strike' % css), 'Underline': ('span','%s-u' % css), 'Superscript': ('sup',None), 'Subscript': ('sub',None), 'NumberedList': ('ol', '%s-ol' % css), 'UnnumberedList': ('ul', '%s-ul' % css), } html = '' if node.tag in simpletags.keys(): tag, cssclass = simpletags[node.tag] #if cssclass is None: # html += '<%s>' % tag #else: # html += '<%s class="%s">' % (tag,cssclass) html += '<%s class="%s">' % (tag, strclass(cssclass, add_classes)) close = '</%s>' % tag elif node.tag == 'NumberedListItem': c = "%s-ol-li%d" % (css, int(node.get('level'))) html += '<li class="%s">' % strclass(c, add_classes) close = '</li>' elif node.tag == 'UnnumberedListItem': c = "%s-ul-li%d" % (css, int(node.get('level'))) html += '<li class="%s">' % strclass(c, add_classes) close = '</li>' elif node.tag == 'Heading': c = "%s-h%d" % (css, int(node.get('level'))) html += '<h%d class="%s">' % (int(node.get('level')), strclass(c, add_classes)) close = '</h%d>' % int(node.get('level')) elif node.tag == 'BlockIndent': c = "%s-block-indent" % css html += '<div class="%s">' % strclass(c, add_classes) close = '</div>' elif node.tag == 'LineIndent': c = "%s-line-indent" % css html += '<div class="%s">' % strclass(c, add_classes) close = '</div>' elif node.tag == 'CodeBlock': c = "%s-code-block" % css html += '<div class="%s">' % strclass(c, add_classes) close = '</div>' elif node.tag == 'CodeInline': c = "%s-code-inline" % css html += '<span class="%s">' % strclass(c, add_classes) close = '</span>' elif node.tag == 'ErrorsList': close = '' elif node.tag == 'Error': # I don't apply add_classes here since these are internally generated nodes html += '<div class="%s-error-container">' % css close = '</div>' elif node.tag == 'ErrorMessage': # I don't apply add_classes here since these are internally generated nodes html += '<div class="%s-error-head">Error Message</div>' % css html += '<div class="wikkly-error-body">' close = '</div>' elif node.tag == 'ErrorLookingAt': # I don't apply add_classes here since these are internally generated nodes html += '<div class="wikkly-error-head">Looking at:</div>' html += '<div class="wikkly-error-body">' close = '</div>' elif node.tag == 'ErrorTrace': # I don't apply add_classes here since these are internally generated nodes html += '<div class="wikkly-error-head">Traceback:</div>' html += '<div class="wikkly-error-body">' close = '</div>' elif node.tag in ['Text', 'TextNoWiki']: # These are not allowed to have inner tags. Enforce this by # escaping & returning text immediately, not checking for subnodes. return escapeText(node.text or '') elif node.tag == 'TextCode': return escapeTextCode(node.text or '') elif node.tag == 'TextHTML': if wcontext.restricted_mode: return '' # <html> not allowed in Safe mode text = node.text or '' # no escaping, leave as raw HTML return text # these are just for structure in XML stream, can ignore here elif node.tag in ['WikklyContent', 'Content', 'ElementList']: close = '' elif node.tag == 'BlankLines': # no inner tags allowed, can return immediately # # NOTE: HTML headers seem to include some extra builtin padding (even # more that the margin), so after <Heading>, decrement the BlankLines count if prevsib_map[node].tag == 'Heading': return '<br/>' * (int(node.get('count'))) else: return '<br/>' * (int(node.get('count'))+1) elif node.tag == 'Highlight': if node_contains_block_elements(node): tagopen = 'div' close = '</div>' else: tagopen = 'span' close = '</span>' # use style if given, else use default highlight style if 'style' in node.keys(): if wcontext.restricted_mode: # remove inline styling in safe mode (to remove XSS issues) html += '<%s>' % tagopen else: html += '<%s style="%s">' % (tagopen, node.get('style')) else: c = "%s-highlight" % css html += '<%s class="%s">' % (tagopen, strclass(c, add_classes)) elif node.tag == 'Table': c = "%s-table" % css html += '<table class="%s">' % strclass(c, add_classes) close = '</table>' elif node.tag == 'TableCaption': c = "%s-caption" % css html += '<caption class="%s">' % strclass(c, add_classes) close = '</caption>' elif node.tag == 'TableRow': c = "%s-tr" % css html += '<tr class="%s">' % strclass(c, add_classes) close = '</tr>' elif node.tag == 'TableCell': if int(node.get('skip','0')): return '' if 'type' not in node.keys(): raise WikError("Bad TableCell", xmltrace(parent_map[node])) if node.get('type') == 'data': tag = 'td' close = '</td>' else: tag = 'th' close = '</th>' style = 'text-align: %s;' % sanitize_text_align(node.get('text-align')) if 'bgcolor' in node.keys() and not wcontext.restricted_mode: style += 'background: %s;' % node.get('bgcolor') c = "%s-%s" % (css, tag) html += '<%s class="%s" colspan="%d" rowspan="%d" style="%s">' % \ (tag, strclass(c, add_classes), int(node.get('colspan','1')), int(node.get('rowspan','1')), style) elif node.tag == 'DefinitionList': c = "%s-dl" % css html += '<dl class="%s">' % strclass(c, add_classes) close = '</dl>' elif node.tag == 'DefinitionEntry': # for XML structure only, no HTML rendering close = '' elif node.tag == 'DefinitionTerm': c = "%s-dt" % css html += '<dt class="%s">' % strclass(c, add_classes) close = '</dt>' elif node.tag == 'DefinitionDef': c = "%s-dd" % css html += '<dd class="%s">' % strclass(c, add_classes) close = '</dd>' elif node.tag == 'Link': # write entire block here and return source = node.find('LinkSource').find('Text').text dest = node.find('LinkDest').find('Text').text url,linktype = resolve_URL(wcontext, dest) if wcontext.restricted_mode: # if anything potentially unsafe in URL, don't make link. if unsafe_url(url): #print "** REMOVE UNSAFE URL",url return '<span class="wikkly-highlight">Unsafe URL removed</span>' parts = urlparse(url) if len(parts[0]) and len(parts[1]): helptext = 'Link to %s://%s' % (parts[0],parts[1]) else: helptext = url # style URL as appropriate if linktype == 'external': if wcontext.var_get_int('$LINKS_NEW_WINDOW'): onclick = 'onclick="window.open(this.href);return false;"' linkclass = "%s-a-www" % css # style as external link else: onclick = '' linkclass = "%s-a-internal" % css # style as internal link elif linktype == 'internal': onclick = '' linkclass = "%s-a-internal" % css # style as internal link elif linktype == 'newitem': onclick = '' linkclass = "%s-a-unknown-item" % css # style as unknown link else: raise Exception("INTERNAL ERROR - unknown linktype '%s'" % urlnode.get('linktype')) linkclass = 'class="%s"' % strclass(linkclass, add_classes) # show domain next to link so user can see if it's suspicious # (skip if its a local link or relative link) if 0 and wcontext.restricted_mode and len(parts[0]) and len(parts[1]) and \ url[0] != '#' and parts[1][:7] != '127.0.0': #print "** ADDING DOMAIN",parts pass #html += '<a title="%s" %s href="%s" %s>%s</a> [%s]' % \ # (helptext, linkclass, url, # target, # # allow complex structure under LinkElement # make_html(linkelem, wcontext, parent_map, prevsib_map, add_classes), # ('%s://%s' % (parts[0],parts[1]))) else: html += '<a title="%s" %s href="%s" %s>%s</a>' % \ (helptext, linkclass, # use quoting to take care of " and ' as well escquotes(escapeURL(url)), onclick, escapeText(source)) # allow complex structure under LinkElement return html #elif node.tag in ['LinkElement', 'LinkURL']: # # no HTML for these - only for inner Text # close = '' elif node.tag == 'Image': title = node.find('ImageTitle') url = node.find('ImageLink') filename = node.find('ImageFilename').find('Text').text if url is not None: url,linktype = resolve_URL(wcontext, url.find('Text').text) if wcontext.restricted_mode: # if anything potentially unsafe in URL, don't make link. if unsafe_url(url): # don't apply add_classes return '<span class="wikkly-highlight">Unsafe image source removed</span>' if wcontext.restricted_mode: # if anything potentially unsafe in URL, don't make link. if unsafe_url(filename): #print "** REMOVE UNSAFE URL",url # don't apply add_classes return '<span class="wikkly-highlight">Unsafe URL removed</span>' if title is not None: title = title.find('Text').text if url is not None: html += '<a href="%s" ' % escquotes(escapeURL(url)) if wcontext.var_get_int('$LINKS_NEW_WINDOW'): html += 'onclick="window.open(this.href);return false;" ' if title is not None: html += 'title=%s ' % escapeAttr(title) html += '>' c = "%s-img" % css html += '<img class="%s" src=%s alt=%s />' % \ (strclass(c, add_classes), escapeAttr(filename), escapeAttr(filename)) if url is not None: html += '</a>' return html elif node.tag == 'CreateAnchor': name = node.find('Text') c = "%s-a-internal" % css # style as internal link html += '<a name="%s" class="%s"> </a>' % (name.text, strclass(c, add_classes)) return html elif node.tag == 'MacroBlock': # <MacroBlock> is a block-level element created by a macro. # NOTE: The macro that created the element is responsible for not letting # unsafe user-data be injected into 'style'. # 'class' and 'id' are sanity checked. html += '<div ' if 'class' in node.keys(): # NOTE - do NOT add 'css' prefix - assume macro has given full classname html += 'class="%s" ' % strclass(node.get('class'), add_classes) if 'style' in node.keys(): html += 'style="%s" ' % node.get('style') if 'id' in node.keys(): html += 'id="%s" ' % sanitize_id(node.get('id')) html += '>' close = '</div>' elif node.tag == 'MacroInline': # <MacroInline> is an inline element created by a macro. # NOTE: The macro that created the element is responsible for not letting # unsafe user-data be injected into 'style'. # 'class' and 'id' are sanity checked. html += '<span ' if 'class' in node.keys(): # NOTE - do NOT add 'css' prefix - assume macro has given full classname html += 'class="%s" ' % strclass(node.get('class'), add_classes) if 'style' in node.keys(): html += 'style="%s" ' % node.get('style') if 'id' in node.keys(): html += 'id="%s" ' % sanitize_id(node.get('id')) html += '>' close = '</span>' elif node.tag == 'CSSBlock': c = "%s-%s" % (css, sanitize_classname(node.get('class'))) if node_contains_block_elements(node): # CSS seems to behave better if these are in a DIV vs. SPAN html += '<div class="%s">' % strclass(c, add_classes) close = '</div>' else: html += '<span class="%s">' % strclass(c, add_classes) close = '</span>' # append class to inner nodes as well (handle here and return, for simplicity) add_classes.append('%s-%s' % (css,node.get('class'))) for subnode in node: html += make_html(subnode, wcontext, parent_map, prevsib_map, add_classes) html += close add_classes.pop() return html elif node.tag == 'Separator': c = "%s-separator" % css html += '<hr class="%s"/>' % strclass(c, add_classes) close = '' elif node.tag == 'LineBreak': html += '<br/>' close = '' elif node.tag == 'DashChar': html += ' — ' close = '' else: raise WikError("Bad tag %s" % node.tag) # if subnodes not handled already, handle them now for subnode in node: html += make_html(subnode, wcontext, parent_map, prevsib_map, add_classes) html += close return html
def parse(self, txt, wcontext): from time import time # txt should really be unicode, but accept str if it is plain ASCII if isinstance(txt, str): txt = unicode(txt) # sanity if not isinstance(txt, unicode): raise WikError("Unicode value required in parse() - got '%s'" % repr(txt)) # flags: # * need to use re.M so beginning-of-line matches will work as expected # * use re.I for case-insensitive as well # * use re.S so '.' will match newline also self.lexer = lex.lex(object=self, reflags=re.M | re.I | re.S) self.wcontext = wcontext # shortcut for below parser = self.wcontext.parser # state vars - most of these are local context only, but some are set # into self if they are needed above in_bold = 0 in_italic = 0 in_strikethrough = 0 in_underline = 0 in_superscript = 0 in_subscript = 0 in_highlight = 0 in_block_indent = 0 in_line_indent = 0 # if > 0 this is the nesting level # the top of stack is the _currently_ opened listitem + level # e.g. for <ul>, item "###" is ('U',3), for <ol>, item '##' is ('N',2) list_stack = [('X', 0)] # no currently opened list #in_Nlistitem = 0 #in_Ulistitem = 0 in_heading = 0 in_deflist = 0 # tiddlywiki does not let DL/DT/DD nest apparently, so don't worry about it in_defterm = 0 # in <DT>? in_defdef = 0 # in <DD>? #in_imglink = 0 self.in_strip_ccomment = 0 # inside /*** ... ***/ block in_html_comment = 0 # inside <!--- ... ---> block # since CSS blocks can nest, this is a list of currently open blocks, by CSS name css_stack = [] # allow <html> blocks to nest #self.in_html_block = 0 #self.in_code = 0 self.in_table = 0 self.in_tablerow = 0 self.in_tablecell = 0 last_token = (None, None) # type,value self.prepare_input(txt) parser.beginDoc() while 1: tok = self.lexer.token() # check for EOF or over time limit if tok is None or time() > wcontext.stoptime: if tok is not None: parser.characters('ERROR: TIME LIMIT EXCEEDED!') parser.linebreak() #print "EOF LIST CHECK" #print "STACK ",list_stack # close any open lists while list_stack[-1][0] in "NU": kind, n = list_stack.pop() if kind == 'N': parser.endNListItem() parser.endNList() else: parser.endUListItem() parser.endUList() # close any open tables if self.in_tablecell: parser.endTableCell() if self.in_tablerow: parser.endTableRow() if self.in_table: parser.endTable() # close any opened line-indents while in_line_indent: parser.endLineIndent() in_line_indent -= 1 # close any open definition list if in_defterm: parser.endDefinitionTerm() if in_defdef: parser.endDefinitionDef() if in_deflist: parser.endDefinitionList() # watch out for ending inside of a structured item for v, s in [ (in_bold, "'' ... ''"), (in_italic, "// ... //"), (in_strikethrough, "-- ... --"), (in_underline, "__ .. .__"), (in_superscript, "^^ ... ^^"), (in_subscript, "~~ ... ~~"), (in_highlight, "@@ ... @@"), (in_block_indent, "block-indent (<<<)"), #(in_imglink, "[img[ ... ]]"), #(self.in_html_block, "<html> ... </html>"), #(self.in_code, "{{{ ... }}}")]: ]: if v: parser.error("ERROR input ended inside %s" % s, '', '') parser.endDoc() break # while in blockquote, hand parser raw chars #if self.in_blockquote and tok.type != 'BLOCKQUOTE': # if hasattr(tok,'rawtext'): # parser.characters(tok.rawtext) # else: # parser.characters(tok.value) # # continue # while in code, hand parser raw chars #if self.in_code and tok.type != 'CODE_END': # if hasattr(tok,'rawtext'): # parser.characters(tok.rawtext) # else: # parser.characters(tok.value) # # continue # while in <html>, hand parser raw chars, checking for nesting #if self.in_html_block: # if tok.type == 'HTML_END': # self.in_html_block -= 1 # elif tok.type == 'HTML_START': # self.in_html_block += 1 # else: # if hasattr(tok,'rawtext'): # val = tok.rawtext # else: # val = tok.value # # parser.characters(val) # # continue # if just ended a line, and inside a table, and NOT starting a new tablerow, end table #if last_token[0] == 'EOLS' and in_table: # if tok.type != 'TABLEROW_START' or len(last_token[1]) > 1: # parser.endTable() # in_table = 0 # if just ended a line, and inside a line-indent, and NOT starting a new # line-indent, end indented section if last_token[0] == 'EOLS' and in_line_indent: if tok.type != 'LINE_INDENT': # close all nested blocks while in_line_indent: parser.endLineIndent() in_line_indent -= 1 # if just ended a line, and inside a definition list, and NOT starting a new definition item, end list if last_token[0] == 'EOLS' and in_deflist: if tok.type not in ['D_TERM', 'D_DEFINITION' ] or len(last_token[1]) > 1: parser.endDefinitionList() in_deflist = 0 # if just saw TABLEROW_END or TABLEROW_CAPTION and next token not # TABLEROW_CAPTION or TABLEROW_START, then end table if self.in_table and last_token[0] in ['TABLEROW_END','TABLEROW_CAPTION'] and \ tok.type not in ['TABLEROW_CAPTION', 'TABLEROW_START']: if self.in_tablecell: parser.endTableCell() self.in_tablecell = 0 if self.in_tablerow: parser.endTableRow() self.in_tablerow = 0 parser.endTable() self.in_table = 0 # if I just ended a line, and am inside a listitem, then check next token. # if not a listitem, pop & close all currently opened lists if last_token[0] == "EOLS" and list_stack[-1][1] >= 1: # if new token not a listitem or there were multiple EOLs, close all lists if tok.type not in ['N_LISTITEM', 'U_LISTITEM' ] or len(last_token[1]) > 1: #print "EOL CLOSE LISTS" #print "STACK ",list_stack # close all open lists while list_stack[-1][0] in "NU": kind, n = list_stack.pop() if kind == 'N': parser.endNListItem() parser.endNList() else: parser.endUListItem() parser.endUList() if tok.type == 'TEXT': #parser.characters(self.no_tags(tok.value)) parser.characters(tok.value) #elif tok.type == 'RAWTEXT': # internally generated type that tells me not to escape text # parser.characters(tok.value) #elif tok.type == 'HTML_START': # self.in_html_block += 1 elif tok.type == 'BOLD': if in_bold: parser.endBold() in_bold = 0 else: parser.beginBold() in_bold = 1 elif tok.type == 'ITALIC': if in_italic: parser.endItalic() in_italic = 0 else: parser.beginItalic() in_italic = 1 elif tok.type == 'STRIKETHROUGH': if in_strikethrough: parser.endStrikethrough() in_strikethrough = 0 else: parser.beginStrikethrough() in_strikethrough = 1 elif tok.type == 'UNDERLINE': if in_underline: parser.endUnderline() in_underline = 0 else: parser.beginUnderline() in_underline = 1 elif tok.type == 'SUPERSCRIPT': if in_superscript: parser.endSuperscript() in_superscript = 0 else: parser.beginSuperscript() in_superscript = 1 elif tok.type == 'SUBSCRIPT': if in_subscript: parser.endSubscript() in_subscript = 0 else: parser.beginSubscript() in_subscript = 1 elif tok.type == 'HIGHLIGHT_DEFAULT': # can be end of any other "@@" style, or the start of the default style if in_highlight: parser.endHighlight() in_highlight = 0 else: # begin default highlight style parser.beginHighlight() in_highlight = 1 elif tok.type in [ 'HIGHLIGHT_CSS', 'HIGHLIGHT_COLOR', 'HIGHLIGHT_BG' ]: #print "TOKEN",tok.type,tok.value if in_highlight: # the '@@' is the end of the highlight - reparse remainder txt = self.lexer.lexdata[self.lexer.lexpos:] self.lexer.input(tok.value[2:] + txt) parser.endHighlight() in_highlight = 0 else: # send style to parser so it knows what kind of element # to create parser.beginHighlight(tok.value) in_highlight = 1 #elif tok.type == 'BLOCKQUOTE': elif tok.type == 'BLOCK_INDENT': if in_block_indent: parser.endBlockIndent() in_block_indent = 0 else: parser.beginBlockIndent() in_block_indent = 1 elif tok.type == 'LINE_INDENT': # get >> chars m = re.match(self.t_LINE_INDENT, tok.value) # adjust new new nesting level nr = len(m.group(1)) while nr > in_line_indent: parser.beginLineIndent() in_line_indent += 1 while nr < in_line_indent: parser.endLineIndent() in_line_indent -= 1 elif tok.type == 'HTML_ESCAPE': m = re.match(self.t_HTML_ESCAPE, tok.value, re.M | re.I | re.S) parser.beginRawHTML() parser.characters(m.group(1)) parser.endRawHTML() elif tok.type == 'WIKI_ESCAPE': m = re.match(self.t_WIKI_ESCAPE, tok.value, re.M | re.I | re.S) # <nowiki> gets its own Text type to prevent camelwording parser.beginNoWiki() parser.characters(m.group(1)) parser.endNoWiki() elif tok.type == 'D_TERM': if not in_deflist: parser.beginDefinitionList() in_deflist = 1 parser.beginDefinitionTerm() in_defterm = 1 elif tok.type == 'D_DEFINITION': if not in_deflist: parser.beginDefinitionList() in_deflist = 1 parser.beginDefinitionDef() in_defdef = 1 elif tok.type == 'N_LISTITEM': #print "N_LISTITEM, VALUE ",tok.value, "STACK ",list_stack # (see file 'stack' for more detailed derivation) # # remember: # Top of stack is CURRENTLY opened listitem (the one before me) # cases: # 1. top of stack is my same type AND level: # Close current listitem and start new one (leave stack alone) # 2. top of stack is LOWER level, ANY type: # I'm a sublist of current item - open a new list, leaving current list open # Push self to TOS # 3. top of stack is HIGHER level, ANY type: # Current item is sublist of MY previous sibling. Close lists till I find # my same type AND level at TOS (watch for emptying stack!) # Start new item or new list (push to TOS). # 4. different type, same level: # Close current list, pop TOS and start new list (push self to TOS) # case 1: if list_stack[-1][0] == 'N' and list_stack[-1][1] == len( tok.value): parser.endNListItem() parser.beginNListItem(tok.value) # case 2: elif list_stack[-1][1] < len(tok.value): parser.beginNList() parser.beginNListItem(tok.value) list_stack.append(('N', len(tok.value))) # case 3: elif list_stack[-1][1] > len(tok.value): while (not(list_stack[-1][0] == 'N' and list_stack[-1][1] == len(tok.value))) and \ list_stack[-1][0] in 'NU': # watch for end of stack as well # close TOS list if list_stack[-1][0] == 'N': parser.endNListItem() parser.endNList() else: parser.endUListItem() parser.endUList() list_stack.pop() # did I empty the stack? if list_stack[-1][0] != 'N': # yes, start new list parser.beginNList() else: # close current item parser.endNListItem() parser.beginNListItem(tok.value) # do NOT push to stack since TOS is already correct # case 4: elif list_stack[-1][0] == 'U' and list_stack[-1][1] == len( tok.value): # close current list & pop TOS parser.endUListItem() parser.endUList() list_stack.pop() # start new list & item parser.beginNList() parser.beginNListItem(tok.value) list_stack.append(('N', len(tok.value))) else: # cannot reach ... if my logic is correct :-) raise WikError("** INTERNAL ERROR in N_LISTITEM **") elif tok.type == 'U_LISTITEM': # (see comments in N_LISTITEM) #print "U_LISTITEM, VALUE ",tok.value, "STACK ",list_stack # case 1: if list_stack[-1][0] == 'U' and list_stack[-1][1] == len( tok.value): parser.endUListItem() parser.beginUListItem(tok.value) # case 2: elif list_stack[-1][1] < len(tok.value): parser.beginUList() parser.beginUListItem(tok.value) list_stack.append(('U', len(tok.value))) # case 3: elif list_stack[-1][1] > len(tok.value): while (not(list_stack[-1][0] == 'U' and list_stack[-1][1] == len(tok.value))) and \ list_stack[-1][0] in 'NU': # watch for end of stack as well # close TOS list if list_stack[-1][0] == 'U': parser.endUListItem() parser.endUList() else: parser.endNListItem() parser.endNList() list_stack.pop() # did I empty the stack? if list_stack[-1][0] != 'U': # yes, start new list parser.beginUList() else: # close current item parser.endUListItem() parser.beginUListItem(tok.value) # do NOT push to stack since TOS is already correct # case 4: elif list_stack[-1][0] == 'N' and list_stack[-1][1] == len( tok.value): # close current list & pop TOS parser.endNListItem() parser.endNList() list_stack.pop() # start new list & item parser.beginUList() parser.beginUListItem(tok.value) list_stack.append(('U', len(tok.value))) else: # cannot reach ... if my logic is correct :-) raise WikError("** INTERNAL ERROR in N_LISTITEM **") elif tok.type == 'HEADING': # inside a table, this is a regular char (so parser can see it and # know to switch to <th>, etc.) if self.in_table: #print "RAWTEXT HEADING" parser.characters(tok.rawtext) continue parser.beginHeading(len(tok.value)) in_heading = 1 elif tok.type == 'LINK_AB': parser.handleLink(tok.value[0], tok.value[1]) elif tok.type == 'LINK_A': parser.handleLink(tok.value) elif tok.type in [ 'IMGLINK_TFU', 'IMGLINK_TF', 'IMGLINK_FU', 'IMGLINK_F' ]: parser.handleImgLink(*tok.value) elif tok.type == 'CSS_BLOCK_START': m = re.match(self.t_CSS_BLOCK_START, tok.value, re.M | re.S | re.I) name = m.group(1) # push on stack css_stack.append(name) # inform parser parser.beginCSSBlock(name) elif tok.type == 'CSS_BLOCK_END': if len(css_stack): # pop name and inform parser name = css_stack.pop() parser.endCSSBlock() else: # regular chars outside of a CSS block parser.characters(tok.value) elif tok.type == 'C_COMMENT_START': #print "******** C_COMMENT_START" if self.in_strip_ccomment: # already in C-comment, treat as normal chars parser.characters(tok.value) else: # begin C-comment (strip comment markers) self.in_strip_ccomment = 1 #elif tok.type == 'C_COMMENT_END': # print "************* C_COMMENT_END" # if not self.in_strip_comment: # # not in C-comment, treat as normal chars # parser.characters(tok.value) # else: # self.in_strip_comment = 0 elif tok.type == 'HTML_COMMENT_START': #print "******** C_COMMENT_START" if in_html_comment: # already in HTML comment, treat as normal chars parser.characters(tok.value) else: # begin HTML comment (strip comment markers) in_html_comment = 1 elif tok.type == 'HTML_COMMENT_END': #print "************* C_COMMENT_END" if not in_html_comment: # not in HTML-comment, treat as normal chars parser.characters(tok.value) else: # strip end markers in_html_comment = 0 elif tok.type == 'CODE_BLOCK': # regex grabs entire block since no nesting allowed m = re.match(self.t_CODE_BLOCK, tok.value, re.M | re.I | re.S) text = m.group(1) self.handle_codeblock(parser, text) elif tok.type == 'CODE_BLOCK_CSS': # regex grabs entire block since no nesting allowed m = re.match(self.t_CODE_BLOCK_CSS, tok.value, re.M | re.I | re.S) text = m.group(1) self.handle_codeblock(parser, text) elif tok.type == 'CODE_BLOCK_CPP': # regex grabs entire block since no nesting allowed m = re.match(self.t_CODE_BLOCK_CPP, tok.value, re.M | re.I | re.S) text = m.group(1) self.handle_codeblock(parser, text) elif tok.type == 'CODE_BLOCK_HTML': # regex grabs entire block since no nesting allowed m = re.match(self.t_CODE_BLOCK_HTML, tok.value, re.M | re.I | re.S) text = m.group(1) self.handle_codeblock(parser, text) #elif tok.type == 'CODE_START': # # note: while in code, nothing else comes here (see above), # # so don't have to test for nesting # parser.beginCode() # self.in_code = 1 #elif tok.type == 'CODE_END': # # is it a code block? # if self.in_code: # parser.endCode() # self.in_code = 0 # # else, might be a CSS block ending # elif len(css_stack): # # pop name and inform parser # name = css_stack.pop() # parser.endCSSBlock(name) # # otherwise, it's just regular text # else: # parser.characters(tok.value) elif tok.type == 'TABLEROW_START': if not self.in_table: parser.beginTable() self.in_table = 1 parser.beginTableRow() self.in_tablerow = 1 parser.beginTableCell() self.in_tablecell = 1 #in_tablerow = 1 elif tok.type == 'TABLEROW_END': if not self.in_table: # split | portion from "\n" portion m = re.match(self.t_TABLEROW_END, tok.value, re.M | re.I | re.S) parser.characters(m.group(1)) # feed \n back to parser txt = self.lexer.lexdata[self.lexer.lexpos:] self.lexer.input('\n' + txt) else: parser.endTableCell() self.in_tablecell = 0 parser.endTableRow() self.in_tablerow = 0 elif tok.type == 'TABLE_END': if not self.in_table: # split | portion from "\n" portion m = re.match(self.t_TABLE_END, tok.value, re.M | re.I | re.S) parser.characters(m.group(1)) # feed \n's back to parser txt = self.lexer.lexdata[self.lexer.lexpos:] self.lexer.input(m.group(2) + txt) else: parser.endTableCell() self.in_tablecell = 0 parser.endTableRow() self.in_tablerow = 0 parser.endTable() self.in_table = 0 elif tok.type == 'TABLEROW_CAPTION': # watch for caption as first row of table if not self.in_table: parser.beginTable() self.in_table = 1 m = re.match(self.t_TABLEROW_CAPTION, tok.value, re.M | re.I | re.S) parser.setTableCaption(m.group(1)) txt = self.lexer.lexdata[self.lexer.lexpos:] # have to check for table ending since I grabbed the \n if re.match(r"[\t ]*[\n]", txt): parser.endTable() self.in_table = 0 elif tok.type == 'PIPECHAR': if self.in_table: parser.endTableCell() # Start next cell UNLESS this is the end of the buffer. # Prevents having a false empty cell at the end of the # table if the row ends in EOF txt = self.lexer.lexdata[self.lexer.lexpos:] if not only_spaces(txt): parser.beginTableCell() else: self.in_tablecell = 0 else: parser.characters(tok.value) elif tok.type == 'SEPARATOR': parser.separator() elif tok.type == 'CATCH_URL': # turn bare URL into link like: [[URL|URL]] parser.handleLink(tok.value, tok.value) elif tok.type == 'NULLDOT': pass # nothing #elif tok.type == 'DELETE_ME': # pass # nothing elif tok.type == 'XHTML_ENTITY': s = tok.value if s[-1] == ';': # remove ; if present addsemi = u';' # remember to add back (below), if needed s = s[:-1] else: addsemi = u'' s = s[1:] # strip & if s == '#DeleteMe': continue # check for hex entity m = re.match(r'\#x([0-9a-h]+)', s, re.M | re.I | re.S) if m: if m.group(1) in ['200b', '200B']: # ​ is special - pass to XML layer parser.characters('​') else: parser.characters(unichr(hex2int(m.group(1)))) continue # check for decimal entity m = re.match(r'\#([0-9]+)', s, re.M | re.I | re.S) if m: parser.characters(unichr(int(m.group(1)))) continue # see if name defined in htmlentitydefs import htmlentitydefs as hed if hed.name2codepoint.has_key(s): parser.characters(unichr(hed.name2codepoint[s])) else: # else, return as raw text (will be escaped in final output) parser.characters(u'&' + s + addsemi) #elif tok.type == 'HTML_HEX_ENTITY': # # reparse hex part # m = re.match(self.t_HTML_HEX_ENTITY, tok.value, re.M|re.I|re.S) elif tok.type == 'DASH': parser.dash() #elif tok.type == 'MACRO': # # macro has already run, insert text ... # #parser.characters(self.no_tags(tok.value)) # parser.characters(tok.value) elif tok.type == 'PYTHON_EMBED': if self.wcontext.restricted_mode: self.wcontext.parser.error( "Not allowed to define macros in Safe Mode", tok.rawtext, '') else: parser.beginPyCode() parser.characters(tok.value) parser.endPyCode() #elif tok.type == 'RAWHTML': # print "** RAWHTML **",tok.value # parser.characters(tok.value) elif tok.type == "HTML_BREAK": parser.linebreak() elif tok.type == 'EOLS': # Do NOT handle lists here - they have complex nesting rules so must be # handled separately (above) if in_heading: parser.endHeading() in_heading = 0 #if in_tablerow: # parser.endTableRow() # in_tablerow = 0 #if not in_table: parser.EOLs(tok.value) if in_defterm: parser.endDefinitionTerm() in_defterm = 0 if in_defdef: parser.endDefinitionDef() in_defdef = 0 # remember for next pass last_token = (tok.type, tok.value)
def parse_macro_call(text): """ Parse a macro call <<text text ...>> The input text must begin with '<<'. Parser will read up to and including the closing '>>'. Returns: (macrocall, txt_remainder) Where: * macrocall: Element('MacroCall') with macro args stored as subnodes. Each subnode will be either: a. <TextMacroArg> - a literal text arg b. <MacroCall> - an inner macro call * txt_remainder = Text after macro call. Handles: * Unquoted args (args delimited by whitespace) * Args quoted with ', ", triple-' or triple-" (triple quotes are a WikklyText extension) * Args quoted with <quote> ... </quote> (WikklyText extension) * Python-style escapes inside of quoted strings: \a \b \f \n \r \t \v \' \" \\ xHH (hex, 1-2 chars) NNN (octal, 1-3 chars) \uHHHH unicode escape (only 4-digit unicode is supported on all Pythons) \{ANYCHAR} = ANYCHAR is passed through, if not in above list There is no need for the Python-style "\"+newline continuation since quoted strings are allowed to span lines. The [\r\n] chars are saved as-is. * Linebreaks preserved inside quoted strings (outside of quoted strings, they are delimiters) * Allows string concatenation: 'aaa'bbb"ccc"ddd -> 'aaabbbcccddd' * Preserves empty args ('' and "") Raises WikError on: * Unterminated quotes. * Unterminated "\" inside a quote. * No closing >> * Error calling inner macro. """ # sanity check if text[:2] != '<<': raise Exception("Not a macro call." ) # should never happen, so let it flow to top level in_arg = 0 # inside an arg in_quotechar = None # which quotechar I'm inside (', "", """, ''', or None) i = 2 # args are stored here as subnodes macrocall = Element('MacroCall') out = u'' # current chunk of text while i < len(text): # check for end of macro if text[i:i + 2] == '>>' and in_quotechar is None: break # look for nested macro call if re.match('<<[a-z]', text[i:], re.I) and in_quotechar is None: # if I have a partial arg, close before opening new macro call if in_arg: elem = Element('TextMacroArg') elem.text = out macrocall.append(elem) out = u'' in_arg = 0 # parse inner macro and add it as an arg inner_macrocall, txt_remainder = parse_macro_call(text[i:]) macrocall.append(inner_macrocall) i = 0 text = txt_remainder continue # skip +=1 at bottom for clarity here if text[i] == '\\': in_arg = 1 if in_quotechar: try: c, skip = char_escape(text[i + 1:]) out += c i += skip # skip extra char(s) except IndexError: # string ended with "\" inside a quoted string raise WikError( "Macro statement ended inside unterminated quoted string", text[:80], '', text[i:]) else: out += u'\\' # '\' is a regular char outside of a quoted string elif text[i] in '\t \r\n': if not in_arg: i += 1 # skip whitespace outside of quoted strings continue if in_quotechar: out += text[i] # preserve whitespace inside a quoted string else: # end of arg - convert to Element and store. # NOTE: I'm using a special tag 'TextMacroArg' here. This purpose of this # is to catch any macro args that aren't processed by the macro. i.e. if # a "TextMacroArg" tag makes it through to the HTML writer, something is wrong. elem = Element('TextMacroArg') elem.text = out macrocall.append(elem) out = u'' in_arg = 0 # look for triple quotes elif text[i:i + 3] in ["'''", '"""']: in_arg = 1 if in_quotechar == text[i:i + 3]: # end of quote? # don't end arg - wait for whitespace break. this gives concatenation behaviour like: # "aaa"bbb'ccc'ddd ==> 'aaabbbcccddd' in_quotechar = None elif in_quotechar: out += text[i:i + 3] # regular text inside of another quote else: in_quotechar = text[i:i + 3] # begin quote # skip two extra chars i += 2 # look for single-quotes elif text[i] in ["'", '"']: in_arg = 1 # end of quote? if in_quotechar == text[i]: # don't end arg - wait for whitespace break. this gives concatenation behaviour like: # "aaa"bbb'ccc'ddd ==> 'aaabbbcccddd' in_quotechar = None # in another quote? elif in_quotechar: out += text[i] # regular char inside another quote else: in_quotechar = text[i] # begin squote # look for <quote> elif text[i:i + 7] == '<quote>': in_arg = 1 # in another quote? if in_quotechar: out += text[i:i + 7] # regular char inside another quote else: in_quotechar = text[i:i + 7] # begin squote # skip 6 extra chars i += 6 # look for </quote> elif text[i:i + 8] == '</quote>': in_arg = 1 # end of quote? if in_quotechar == '<quote>': # don't end arg - wait for whitespace break. this gives concatenation behaviour like: # "aaa"bbb'ccc'ddd ==> 'aaabbbcccddd' in_quotechar = None else: out += text[i:i + 8] # regular text # skip 7 extra chars i += 7 else: in_arg = 1 out += text[i] # regular char i += 1 if text[i:i + 2] != '>>': raise WikError("No closing >> in macro statement", text[:80], '', text[i:]) # the user might not have intended this, so let them know ... if in_quotechar: raise WikError( "Macro statement ended inside a quoted string (current quote=%s)" % in_quotechar, text[:80], '', text[i:]) # finish final partial arg if any if in_arg: # see note above about why this is 'TextMacroArg' instead of 'Text' elem = Element('TextMacroArg') elem.text = out macrocall.append(elem) # skip ending '>>' i += 2 # collect all whitespace after this point k = i trailing = u'' while len(text[k:]) and text[k] in ' \t\n': trailing += text[k] k += 1 # if there is another macro call or a comment (i.e. non-visible markup), remove # all intervening whitespace. # else, restore either a single space or a single \n depending on # what I found. if re.match(r'<<[a-z_]+', text[k:]) or re.match(r'/%', text[k:]): remainder = text[k:] elif trailing.find('\n\n') >= 0: # check before '\n' remainder = u'\n\n' + text[k:] elif '\n' in trailing: remainder = u'\n' + text[k:] elif len(trailing): remainder = u' ' + text[k:] else: remainder = text[k:] # the first element is really the macro name - return it as text #return (outlist[0].text, outlist[1:], remainder) return (macrocall, remainder)
def WikklyText_to_XML(content, encoding, safe_mode, setvars=None, max_runtime=-1, url_resolver=None, tree_posthook=None, plugin_dirs=None, rendercache=None, macro_handler=None, **kwargs): """ Convert the given wikitext to XML. |>|!Inputs| |content\ |Wikitext (//unicode//), usually from {{tt{wikklytext.base.load_wikitext()}}}| |encoding\ |Desired output encoding (i.e. {{{'utf-8'}}})| |safe_mode\ |True/False, whether to use Safe mode.| |setvars\ |Variables to set into ~WikContext, as dict of:<br>{{{name: <str, unicode or int>}}}<br> \ where 'name' can have a leading '$' to set sysvars.| |max_runtime\ |Maximum time (in seconds) to run, or -1 for unlimited.| |url_resolver\ |URL resolver. Must be a callable like [[default_URL_resolver()|#FUNC_default_URL_resolver]]. \ If None, a default resolver will be used.| |tree_posthook\ |Hook to call after ~ElementTree is complete, before generating XML. Will be called as: <br>\ {{{tree_posthook(rootnode, context)}}} <br> Hook should modify tree in-place.| |plugin_dirs\ |Paths to search for plugins. Can be a list of strings, a single string, or None.| |rendercache\ |Instance of class in wikklytext.cache to perform caching.| |macro_handler\ |Caller-defined macro handler. Will be called as:<br>\ {{{ handled, result = macro_handler(name, context, *elements) }}}\ Returns:\ * ''handled'': True/False if macro call was handled. * ''result'' is the macro return value, ready to be processed \ by ''macro.process_macro_result''().| |kwargs\ |//Undocumented// -- for backward compatibility with earlier keyword args. Do not \ pass this directly.| |>|!Returns: {{{(xml, context)}}} | |xml\ |Generated XML as an encoded bytestring| |context\ |~WikContext that was used, in case user wants to inspect it.| """ from wikklytext.base import WikContext from wikklytext.eval import eval_wiki_elem setvars = setvars or {} if kwargs.has_key('plugin_dir'): # in 1.4.0 this was named 'plugin_dir' so accept if plugin_dirs not given plugin_dirs = plugin_dirs or kwargs.get('plugin_dir', None) deprecation( "Change 'plugin_dir' to 'plugin_dirs' in args to WikklyText_to_XML()" ) wcontext = WikContext(restricted_mode=safe_mode, max_runtime=max_runtime, url_resolver=url_resolver, plugin_dirs=plugin_dirs, rendercache=rendercache, macro_handler=macro_handler) # set any passed vars into the context for name, value in setvars.items(): if isinstance(value, (str, unicode)): wcontext.var_set_text(name, value) elif isinstance(value, int): wcontext.var_set_int(name, value) else: raise WikError("Bad value in setvars") elem = WikklyText_to_Tree(wcontext, content) elem = eval_wiki_elem(wcontext, elem) # include errors from wcontext elist = elem.find('ErrorsList') for e in wcontext.parser.getErrors(): elist.append(e) # call hook to postprocess tree before making XML if tree_posthook is not None: tree_posthook(elem, wcontext) xml = dumpxml(elem) return (xml, wcontext)