def test_unirange(self): first_non_bmp = u'\U00010000' r = re.compile(util.unirange(0x10000, 0x20000)) m = r.match(first_non_bmp) self.assertTrue(m) self.assertEquals(m.end(), len(first_non_bmp)) self.assertFalse(r.match(u'\uffff')) self.assertFalse(r.match(u'xxx')) # Tests that end is inclusive r = re.compile(util.unirange(0x10000, 0x10000) + '+') # Tests that the plus works for the entire unicode point, if narrow # build m = r.match(first_non_bmp * 2) self.assertTrue(m) self.assertEquals(m.end(), len(first_non_bmp) * 2)
def test_unirange(self): first_non_bmp = u'\U00010000' r = re.compile(util.unirange(0x10000, 0x20000)) m = r.match(first_non_bmp) self.assertTrue(m) self.assertEqual(m.end(), len(first_non_bmp)) self.assertFalse(r.match(u'\uffff')) self.assertFalse(r.match(u'xxx')) # Tests that end is inclusive r = re.compile(util.unirange(0x10000, 0x10000) + '+') # Tests that the plus works for the entire unicode point, if narrow # build m = r.match(first_non_bmp * 2) self.assertTrue(m) self.assertEqual(m.end(), len(first_non_bmp) * 2)
def test_unirange(): first_non_bmp = u'\U00010000' r = re.compile(util.unirange(0x10000, 0x20000)) m = r.match(first_non_bmp) assert m assert m.end() == len(first_non_bmp) assert not r.match(u'\uffff') assert not r.match(u'xxx') # Tests that end is inclusive r = re.compile(util.unirange(0x10000, 0x10000) + '+') # Tests that the plus works for the entire unicode point, if narrow # build m = r.match(first_non_bmp * 2) assert m assert m.end() == len(first_non_bmp) * 2
class XQueryLexer(ExtendedRegexLexer): """ An XQuery lexer, parsing a stream and outputting the tokens needed to highlight xquery code. .. versionadded:: 1.4 """ name = 'XQuery' aliases = ['xquery', 'xqy', 'xq', 'xql', 'xqm'] filenames = ['*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'] mimetypes = ['text/xquery', 'application/xquery'] xquery_parse_state = [] # FIX UNICODE LATER # ncnamestartchar = ( # ur"[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|" # ur"[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|" # ur"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|" # ur"[\u10000-\uEFFFF]" # ) ncnamestartchar = r"(?:[A-Z]|_|[a-z])" # FIX UNICODE LATER # ncnamechar = ncnamestartchar + (ur"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|" # ur"[\u203F-\u2040]") ncnamechar = r"(?:" + ncnamestartchar + r"|-|\.|[0-9])" ncname = "(?:%s+%s*)" % (ncnamestartchar, ncnamechar) pitarget_namestartchar = r"(?:[A-KN-WYZ]|_|:|[a-kn-wyz])" pitarget_namechar = r"(?:" + pitarget_namestartchar + r"|-|\.|[0-9])" pitarget = "%s+%s*" % (pitarget_namestartchar, pitarget_namechar) prefixedname = "%s:%s" % (ncname, ncname) unprefixedname = ncname qname = "(?:%s|%s)" % (prefixedname, unprefixedname) entityref = r'(?:&(?:lt|gt|amp|quot|apos|nbsp);)' charref = r'(?:&#[0-9]+;|&#x[0-9a-fA-F]+;)' stringdouble = r'(?:"(?:' + entityref + r'|' + charref + r'|""|[^&"])*")' stringsingle = r"(?:'(?:" + entityref + r"|" + charref + r"|''|[^&'])*')" # FIX UNICODE LATER # elementcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' # ur'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') elementcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_\'`|~]' # quotattrcontentchar = (ur'\t|\r|\n|[\u0020-\u0021]|[\u0023-\u0025]|' # ur'[\u0027-\u003b]|[\u003d-\u007a]|\u007c|[\u007e-\u007F]') quotattrcontentchar = r'[A-Za-z]|\s|\d|[!#$%()*+,\-./:;=?@\[\\\]^_\'`|~]' # aposattrcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' # ur'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') aposattrcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_`|~]' # CHAR elements - fix the above elementcontentchar, quotattrcontentchar, # aposattrcontentchar # x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] flags = re.DOTALL | re.MULTILINE | re.UNICODE def punctuation_root_callback(lexer, match, ctx): yield match.start(), Punctuation, match.group(1) # transition to root always - don't pop off stack ctx.stack = ['root'] ctx.pos = match.end() def operator_root_callback(lexer, match, ctx): yield match.start(), Operator, match.group(1) # transition to root always - don't pop off stack ctx.stack = ['root'] ctx.pos = match.end() def popstate_tag_callback(lexer, match, ctx): yield match.start(), Name.Tag, match.group(1) ctx.stack.append(lexer.xquery_parse_state.pop()) ctx.pos = match.end() def popstate_xmlcomment_callback(lexer, match, ctx): yield match.start(), String.Doc, match.group(1) ctx.stack.append(lexer.xquery_parse_state.pop()) ctx.pos = match.end() def popstate_kindtest_callback(lexer, match, ctx): yield match.start(), Punctuation, match.group(1) next_state = lexer.xquery_parse_state.pop() if next_state == 'occurrenceindicator': if re.match("[?*+]+", match.group(2)): yield match.start(), Punctuation, match.group(2) ctx.stack.append('operator') ctx.pos = match.end() else: ctx.stack.append('operator') ctx.pos = match.end(1) else: ctx.stack.append(next_state) ctx.pos = match.end(1) def popstate_callback(lexer, match, ctx): yield match.start(), Punctuation, match.group(1) # if we have run out of our state stack, pop whatever is on the pygments # state stack if len(lexer.xquery_parse_state) == 0: ctx.stack.pop() elif len(ctx.stack) > 1: ctx.stack.append(lexer.xquery_parse_state.pop()) else: # i don't know if i'll need this, but in case, default back to root ctx.stack = ['root'] ctx.pos = match.end() def pushstate_element_content_starttag_callback(lexer, match, ctx): yield match.start(), Name.Tag, match.group(1) lexer.xquery_parse_state.append('element_content') ctx.stack.append('start_tag') ctx.pos = match.end() def pushstate_cdata_section_callback(lexer, match, ctx): yield match.start(), String.Doc, match.group(1) ctx.stack.append('cdata_section') lexer.xquery_parse_state.append(ctx.state.pop) ctx.pos = match.end() def pushstate_starttag_callback(lexer, match, ctx): yield match.start(), Name.Tag, match.group(1) lexer.xquery_parse_state.append(ctx.state.pop) ctx.stack.append('start_tag') ctx.pos = match.end() def pushstate_operator_order_callback(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) ctx.stack = ['root'] lexer.xquery_parse_state.append('operator') ctx.pos = match.end() def pushstate_operator_map_callback(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) ctx.stack = ['root'] lexer.xquery_parse_state.append('operator') ctx.pos = match.end() def pushstate_operator_root_validate(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) ctx.stack = ['root'] lexer.xquery_parse_state.append('operator') ctx.pos = match.end() def pushstate_operator_root_validate_withmode(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Keyword, match.group(3) ctx.stack = ['root'] lexer.xquery_parse_state.append('operator') ctx.pos = match.end() def pushstate_operator_processing_instruction_callback(lexer, match, ctx): yield match.start(), String.Doc, match.group(1) ctx.stack.append('processing_instruction') lexer.xquery_parse_state.append('operator') ctx.pos = match.end() def pushstate_element_content_processing_instruction_callback(lexer, match, ctx): yield match.start(), String.Doc, match.group(1) ctx.stack.append('processing_instruction') lexer.xquery_parse_state.append('element_content') ctx.pos = match.end() def pushstate_element_content_cdata_section_callback(lexer, match, ctx): yield match.start(), String.Doc, match.group(1) ctx.stack.append('cdata_section') lexer.xquery_parse_state.append('element_content') ctx.pos = match.end() def pushstate_operator_cdata_section_callback(lexer, match, ctx): yield match.start(), String.Doc, match.group(1) ctx.stack.append('cdata_section') lexer.xquery_parse_state.append('operator') ctx.pos = match.end() def pushstate_element_content_xmlcomment_callback(lexer, match, ctx): yield match.start(), String.Doc, match.group(1) ctx.stack.append('xml_comment') lexer.xquery_parse_state.append('element_content') ctx.pos = match.end() def pushstate_operator_xmlcomment_callback(lexer, match, ctx): yield match.start(), String.Doc, match.group(1) ctx.stack.append('xml_comment') lexer.xquery_parse_state.append('operator') ctx.pos = match.end() def pushstate_kindtest_callback(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) lexer.xquery_parse_state.append('kindtest') ctx.stack.append('kindtest') ctx.pos = match.end() def pushstate_operator_kindtestforpi_callback(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) lexer.xquery_parse_state.append('operator') ctx.stack.append('kindtestforpi') ctx.pos = match.end() def pushstate_operator_kindtest_callback(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) lexer.xquery_parse_state.append('operator') ctx.stack.append('kindtest') ctx.pos = match.end() def pushstate_occurrenceindicator_kindtest_callback(lexer, match, ctx): yield match.start(), Name.Tag, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) lexer.xquery_parse_state.append('occurrenceindicator') ctx.stack.append('kindtest') ctx.pos = match.end() def pushstate_operator_starttag_callback(lexer, match, ctx): yield match.start(), Name.Tag, match.group(1) lexer.xquery_parse_state.append('operator') ctx.stack.append('start_tag') ctx.pos = match.end() def pushstate_operator_root_callback(lexer, match, ctx): yield match.start(), Punctuation, match.group(1) lexer.xquery_parse_state.append('operator') ctx.stack = ['root'] ctx.pos = match.end() def pushstate_operator_root_construct_callback(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) lexer.xquery_parse_state.append('operator') ctx.stack = ['root'] ctx.pos = match.end() def pushstate_root_callback(lexer, match, ctx): yield match.start(), Punctuation, match.group(1) cur_state = ctx.stack.pop() lexer.xquery_parse_state.append(cur_state) ctx.stack = ['root'] ctx.pos = match.end() def pushstate_operator_attribute_callback(lexer, match, ctx): yield match.start(), Name.Attribute, match.group(1) ctx.stack.append('operator') ctx.pos = match.end() def pushstate_operator_callback(lexer, match, ctx): yield match.start(), Keyword, match.group(1) yield match.start(), Text, match.group(2) yield match.start(), Punctuation, match.group(3) lexer.xquery_parse_state.append('operator') ctx.pos = match.end() tokens = { 'comment': [ # xquery comments (r'(:\))', Comment, '#pop'), (r'(\(:)', Comment, '#push'), (r'[^:)]', Comment), (r'([^:)]|:|\))', Comment), ], 'whitespace': [ (r'\s+', Text), ], 'operator': [ include('whitespace'), (r'(\})', popstate_callback), (r'\(:', Comment, 'comment'), (r'(\{)', pushstate_root_callback), (r'then|else|external|at|div|except', Keyword, 'root'), (r'order by', Keyword, 'root'), (r'group by', Keyword, 'root'), (r'is|mod|order\s+by|stable\s+order\s+by', Keyword, 'root'), (r'and|or', Operator.Word, 'root'), (r'(eq|ge|gt|le|lt|ne|idiv|intersect|in)(?=\b)', Operator.Word, 'root'), (r'return|satisfies|to|union|where|count|preserve\s+strip', Keyword, 'root'), (r'(>=|>>|>|<=|<<|<|-|\*|!=|\+|\|\||\||:=|=|!)', operator_root_callback), (r'(::|:|;|\[|//|/|,)', punctuation_root_callback), (r'(castable|cast)(\s+)(as)\b', bygroups(Keyword, Text, Keyword), 'singletype'), (r'(instance)(\s+)(of)\b', bygroups(Keyword, Text, Keyword), 'itemtype'), (r'(treat)(\s+)(as)\b', bygroups(Keyword, Text, Keyword), 'itemtype'), (r'(case)(\s+)(' + stringdouble + ')', bygroups(Keyword, Text, String.Double), 'itemtype'), (r'(case)(\s+)(' + stringsingle + ')', bygroups(Keyword, Text, String.Single), 'itemtype'), (r'(case|as)\b', Keyword, 'itemtype'), (r'(\))(\s*)(as)', bygroups(Punctuation, Text, Keyword), 'itemtype'), (r'\$', Name.Variable, 'varname'), (r'(for|let|previous|next)(\s+)(\$)', bygroups(Keyword, Text, Name.Variable), 'varname'), (r'(for)(\s+)(tumbling|sliding)(\s+)(window)(\s+)(\$)', bygroups(Keyword, Text, Keyword, Text, Keyword, Text, Name.Variable), 'varname'), # (r'\)|\?|\]', Punctuation, '#push'), (r'\)|\?|\]', Punctuation), (r'(empty)(\s+)(greatest|least)', bygroups(Keyword, Text, Keyword)), (r'ascending|descending|default', Keyword, '#push'), (r'(allowing)(\s+)(empty)', bygroups(Keyword, Text, Keyword)), (r'external', Keyword), (r'(start|when|end)', Keyword, 'root'), (r'(only)(\s+)(end)', bygroups(Keyword, Text, Keyword), 'root'), (r'collation', Keyword, 'uritooperator'), # eXist specific XQUF (r'(into|following|preceding|with)', Keyword, 'root'), # support for current context on rhs of Simple Map Operator (r'\.', Operator), # finally catch all string literals and stay in operator state (stringdouble, String.Double), (stringsingle, String.Single), (r'(catch)(\s*)', bygroups(Keyword, Text), 'root'), ], 'uritooperator': [ (stringdouble, String.Double, '#pop'), (stringsingle, String.Single, '#pop'), ], 'namespacedecl': [ include('whitespace'), (r'\(:', Comment, 'comment'), (r'(at)(\s+)('+stringdouble+')', bygroups(Keyword, Text, String.Double)), (r"(at)(\s+)("+stringsingle+')', bygroups(Keyword, Text, String.Single)), (stringdouble, String.Double), (stringsingle, String.Single), (r',', Punctuation), (r'=', Operator), (r';', Punctuation, 'root'), (ncname, Name.Namespace), ], 'namespacekeyword': [ include('whitespace'), (r'\(:', Comment, 'comment'), (stringdouble, String.Double, 'namespacedecl'), (stringsingle, String.Single, 'namespacedecl'), (r'inherit|no-inherit', Keyword, 'root'), (r'namespace', Keyword, 'namespacedecl'), (r'(default)(\s+)(element)', bygroups(Keyword, Text, Keyword)), (r'preserve|no-preserve', Keyword), (r',', Punctuation), ], 'annotationname': [ (r'\(:', Comment, 'comment'), (qname, Name.Decorator), (r'(\()(' + stringdouble + ')', bygroups(Punctuation, String.Double)), (r'(\()(' + stringsingle + ')', bygroups(Punctuation, String.Single)), (r'(\,)(\s+)(' + stringdouble + ')', bygroups(Punctuation, Text, String.Double)), (r'(\,)(\s+)(' + stringsingle + ')', bygroups(Punctuation, Text, String.Single)), (r'\)', Punctuation), (r'(\s+)(\%)', bygroups(Text, Name.Decorator), 'annotationname'), (r'(\s+)(variable)(\s+)(\$)', bygroups(Text, Keyword.Declaration, Text, Name.Variable), 'varname'), (r'(\s+)(function)(\s+)', bygroups(Text, Keyword.Declaration, Text), 'root') ], 'varname': [ (r'\(:', Comment, 'comment'), (r'(' + qname + ')(\()?', bygroups(Name, Punctuation), 'operator'), ], 'singletype': [ include('whitespace'), (r'\(:', Comment, 'comment'), (ncname + r'(:\*)', Name.Variable, 'operator'), (qname, Name.Variable, 'operator'), ], 'itemtype': [ include('whitespace'), (r'\(:', Comment, 'comment'), (r'\$', Name.Variable, 'varname'), (r'(void)(\s*)(\()(\s*)(\))', bygroups(Keyword, Text, Punctuation, Text, Punctuation), 'operator'), (r'(element|attribute|schema-element|schema-attribute|comment|text|' r'node|binary|document-node|empty-sequence)(\s*)(\()', pushstate_occurrenceindicator_kindtest_callback), # Marklogic specific type? (r'(processing-instruction)(\s*)(\()', bygroups(Keyword, Text, Punctuation), ('occurrenceindicator', 'kindtestforpi')), (r'(item)(\s*)(\()(\s*)(\))(?=[*+?])', bygroups(Keyword, Text, Punctuation, Text, Punctuation), 'occurrenceindicator'), (r'(\(\#)(\s*)', bygroups(Punctuation, Text), 'pragma'), (r';', Punctuation, '#pop'), (r'then|else', Keyword, '#pop'), (r'(at)(\s+)(' + stringdouble + ')', bygroups(Keyword, Text, String.Double), 'namespacedecl'), (r'(at)(\s+)(' + stringsingle + ')', bygroups(Keyword, Text, String.Single), 'namespacedecl'), (r'except|intersect|in|is|return|satisfies|to|union|where|count', Keyword, 'root'), (r'and|div|eq|ge|gt|le|lt|ne|idiv|mod|or', Operator.Word, 'root'), (r':=|=|,|>=|>>|>|\[|\(|<=|<<|<|-|!=|\|\||\|', Operator, 'root'), (r'external|at', Keyword, 'root'), (r'(stable)(\s+)(order)(\s+)(by)', bygroups(Keyword, Text, Keyword, Text, Keyword), 'root'), (r'(castable|cast)(\s+)(as)', bygroups(Keyword, Text, Keyword), 'singletype'), (r'(treat)(\s+)(as)', bygroups(Keyword, Text, Keyword)), (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword)), (r'(case)(\s+)(' + stringdouble + ')', bygroups(Keyword, Text, String.Double), 'itemtype'), (r'(case)(\s+)(' + stringsingle + ')', bygroups(Keyword, Text, String.Single), 'itemtype'), (r'case|as', Keyword, 'itemtype'), (r'(\))(\s*)(as)', bygroups(Operator, Text, Keyword), 'itemtype'), (ncname + r':\*', Keyword.Type, 'operator'), (r'(function|map|array)(\()', bygroups(Keyword.Type, Punctuation)), (qname, Keyword.Type, 'occurrenceindicator'), ], 'kindtest': [ (r'\(:', Comment, 'comment'), (r'\{', Punctuation, 'root'), (r'(\))([*+?]?)', popstate_kindtest_callback), (r'\*', Name, 'closekindtest'), (qname, Name, 'closekindtest'), (r'(element|schema-element)(\s*)(\()', pushstate_kindtest_callback), ], 'kindtestforpi': [ (r'\(:', Comment, 'comment'), (r'\)', Punctuation, '#pop'), (ncname, Name.Variable), (stringdouble, String.Double), (stringsingle, String.Single), ], 'closekindtest': [ (r'\(:', Comment, 'comment'), (r'(\))', popstate_callback), (r',', Punctuation), (r'(\{)', pushstate_operator_root_callback), (r'\?', Punctuation), ], 'xml_comment': [ (r'(-->)', popstate_xmlcomment_callback), (r'[^-]{1,2}', Literal), (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + unirange(0x10000, 0x10ffff), Literal), ], 'processing_instruction': [ (r'\s+', Text, 'processing_instruction_content'), (r'\?>', String.Doc, '#pop'), (pitarget, Name), ], 'processing_instruction_content': [ (r'\?>', String.Doc, '#pop'), (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + unirange(0x10000, 0x10ffff), Literal), ], 'cdata_section': [ (r']]>', String.Doc, '#pop'), (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + unirange(0x10000, 0x10ffff), Literal), ], 'start_tag': [ include('whitespace'), (r'(/>)', popstate_tag_callback), (r'>', Name.Tag, 'element_content'), (r'"', Punctuation, 'quot_attribute_content'), (r"'", Punctuation, 'apos_attribute_content'), (r'=', Operator), (qname, Name.Tag), ], 'quot_attribute_content': [ (r'"', Punctuation, 'start_tag'), (r'(\{)', pushstate_root_callback), (r'""', Name.Attribute), (quotattrcontentchar, Name.Attribute), (entityref, Name.Attribute), (charref, Name.Attribute), (r'\{\{|\}\}', Name.Attribute), ], 'apos_attribute_content': [ (r"'", Punctuation, 'start_tag'), (r'\{', Punctuation, 'root'), (r"''", Name.Attribute), (aposattrcontentchar, Name.Attribute), (entityref, Name.Attribute), (charref, Name.Attribute), (r'\{\{|\}\}', Name.Attribute), ], 'element_content': [ (r'</', Name.Tag, 'end_tag'), (r'(\{)', pushstate_root_callback), (r'(<!--)', pushstate_element_content_xmlcomment_callback), (r'(<\?)', pushstate_element_content_processing_instruction_callback), (r'(<!\[CDATA\[)', pushstate_element_content_cdata_section_callback), (r'(<)', pushstate_element_content_starttag_callback), (elementcontentchar, Literal), (entityref, Literal), (charref, Literal), (r'\{\{|\}\}', Literal), ], 'end_tag': [ include('whitespace'), (r'(>)', popstate_tag_callback), (qname, Name.Tag), ], 'xmlspace_decl': [ include('whitespace'), (r'\(:', Comment, 'comment'), (r'preserve|strip', Keyword, '#pop'), ], 'declareordering': [ (r'\(:', Comment, 'comment'), include('whitespace'), (r'ordered|unordered', Keyword, '#pop'), ], 'xqueryversion': [ include('whitespace'), (r'\(:', Comment, 'comment'), (stringdouble, String.Double), (stringsingle, String.Single), (r'encoding', Keyword), (r';', Punctuation, '#pop'), ], 'pragma': [ (qname, Name.Variable, 'pragmacontents'), ], 'pragmacontents': [ (r'#\)', Punctuation, 'operator'), (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + unirange(0x10000, 0x10ffff), Literal), (r'(\s+)', Text), ], 'occurrenceindicator': [ include('whitespace'), (r'\(:', Comment, 'comment'), (r'\*|\?|\+', Operator, 'operator'), (r':=', Operator, 'root'), default('operator'), ], 'option': [ include('whitespace'), (qname, Name.Variable, '#pop'), ], 'qname_braren': [ include('whitespace'), (r'(\{)', pushstate_operator_root_callback), (r'(\()', Punctuation, 'root'), ], 'element_qname': [ (qname, Name.Variable, 'root'), ], 'attribute_qname': [ (qname, Name.Variable, 'root'), ], 'root': [ include('whitespace'), (r'\(:', Comment, 'comment'), # handle operator state # order on numbers matters - handle most complex first (r'\d+(\.\d*)?[eE][+-]?\d+', Number.Float, 'operator'), (r'(\.\d+)[eE][+-]?\d+', Number.Float, 'operator'), (r'(\.\d+|\d+\.\d*)', Number.Float, 'operator'), (r'(\d+)', Number.Integer, 'operator'), (r'(\.\.|\.|\))', Punctuation, 'operator'), (r'(declare)(\s+)(construction)', bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'operator'), (r'(declare)(\s+)(default)(\s+)(order)', bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Keyword.Declaration), 'operator'), (r'(declare)(\s+)(context)(\s+)(item)', bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Keyword.Declaration), 'operator'), (ncname + ':\*', Name, 'operator'), ('\*:'+ncname, Name.Tag, 'operator'), ('\*', Name.Tag, 'operator'), (stringdouble, String.Double, 'operator'), (stringsingle, String.Single, 'operator'), (r'(\}|\])', popstate_callback), # NAMESPACE DECL (r'(declare)(\s+)(default)(\s+)(collation)', bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Keyword.Declaration)), (r'(module|declare)(\s+)(namespace)', bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'namespacedecl'), (r'(declare)(\s+)(base-uri)', bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'namespacedecl'), # NAMESPACE KEYWORD (r'(declare)(\s+)(default)(\s+)(element|function)', bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Keyword.Declaration), 'namespacekeyword'), (r'(import)(\s+)(schema|module)', bygroups(Keyword.Pseudo, Text, Keyword.Pseudo), 'namespacekeyword'), (r'(declare)(\s+)(copy-namespaces)', bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'namespacekeyword'), # VARNAMEs (r'(for|let|some|every)(\s+)(\$)', bygroups(Keyword, Text, Name.Variable), 'varname'), (r'(for)(\s+)(tumbling|sliding)(\s+)(window)(\s+)(\$)', bygroups(Keyword, Text, Keyword, Text, Keyword, Text, Name.Variable), 'varname'), (r'\$', Name.Variable, 'varname'), (r'(declare)(\s+)(variable)(\s+)(\$)', bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Name.Variable), 'varname'), # ANNOTATED GLOBAL VARIABLES AND FUNCTIONS (r'(declare)(\s+)(\%)', bygroups(Keyword.Declaration, Text, Name.Decorator), 'annotationname'), # ITEMTYPE (r'(\))(\s+)(as)', bygroups(Operator, Text, Keyword), 'itemtype'), (r'(element|attribute|schema-element|schema-attribute|comment|' r'text|node|document-node|empty-sequence)(\s+)(\()', pushstate_operator_kindtest_callback), (r'(processing-instruction)(\s+)(\()', pushstate_operator_kindtestforpi_callback), (r'(<!--)', pushstate_operator_xmlcomment_callback), (r'(<\?)', pushstate_operator_processing_instruction_callback), (r'(<!\[CDATA\[)', pushstate_operator_cdata_section_callback), # (r'</', Name.Tag, 'end_tag'), (r'(<)', pushstate_operator_starttag_callback), (r'(declare)(\s+)(boundary-space)', bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'xmlspace_decl'), (r'(validate)(\s+)(lax|strict)', pushstate_operator_root_validate_withmode), (r'(validate)(\s*)(\{)', pushstate_operator_root_validate), (r'(typeswitch)(\s*)(\()', bygroups(Keyword, Text, Punctuation)), (r'(switch)(\s*)(\()', bygroups(Keyword, Text, Punctuation)), (r'(element|attribute|namespace)(\s*)(\{)', pushstate_operator_root_construct_callback), (r'(document|text|processing-instruction|comment)(\s*)(\{)', pushstate_operator_root_construct_callback), # ATTRIBUTE (r'(attribute)(\s+)(?=' + qname + r')', bygroups(Keyword, Text), 'attribute_qname'), # ELEMENT (r'(element)(\s+)(?=' + qname + r')', bygroups(Keyword, Text), 'element_qname'), # PROCESSING_INSTRUCTION (r'(processing-instruction|namespace)(\s+)(' + ncname + r')(\s*)(\{)', bygroups(Keyword, Text, Name.Variable, Text, Punctuation), 'operator'), (r'(declare|define)(\s+)(function)', bygroups(Keyword.Declaration, Text, Keyword.Declaration)), (r'(\{|\[)', pushstate_operator_root_callback), (r'(unordered|ordered)(\s*)(\{)', pushstate_operator_order_callback), (r'(map|array)(\s*)(\{)', pushstate_operator_map_callback), (r'(declare)(\s+)(ordering)', bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'declareordering'), (r'(xquery)(\s+)(version)', bygroups(Keyword.Pseudo, Text, Keyword.Pseudo), 'xqueryversion'), (r'(\(#)(\s*)', bygroups(Punctuation, Text), 'pragma'), # sometimes return can occur in root state (r'return', Keyword), (r'(declare)(\s+)(option)', bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'option'), # URI LITERALS - single and double quoted (r'(at)(\s+)('+stringdouble+')', String.Double, 'namespacedecl'), (r'(at)(\s+)('+stringsingle+')', String.Single, 'namespacedecl'), (r'(ancestor-or-self|ancestor|attribute|child|descendant-or-self)(::)', bygroups(Keyword, Punctuation)), (r'(descendant|following-sibling|following|parent|preceding-sibling' r'|preceding|self)(::)', bygroups(Keyword, Punctuation)), (r'(if)(\s*)(\()', bygroups(Keyword, Text, Punctuation)), (r'then|else', Keyword), # eXist specific XQUF (r'(update)(\s*)(insert|delete|replace|value|rename)', bygroups(Keyword, Text, Keyword)), (r'(into|following|preceding|with)', Keyword), # Marklogic specific (r'(try)(\s*)', bygroups(Keyword, Text), 'root'), (r'(catch)(\s*)(\()(\$)', bygroups(Keyword, Text, Punctuation, Name.Variable), 'varname'), (r'(@'+qname+')', Name.Attribute, 'operator'), (r'(@'+ncname+')', Name.Attribute, 'operator'), (r'@\*:'+ncname, Name.Attribute, 'operator'), (r'@\*', Name.Attribute, 'operator'), (r'(@)', Name.Attribute, 'operator'), (r'//|/|\+|-|;|,|\(|\)', Punctuation), # STANDALONE QNAMES (qname + r'(?=\s*\{)', Name.Tag, 'qname_braren'), (qname + r'(?=\s*\([^:])', Name.Function, 'qname_braren'), (r'(' + qname + ')(#)([0-9]+)', bygroups(Name.Function, Keyword.Type, Number.Integer)), (qname, Name.Tag, 'operator'), ] }
:license: BSD, see LICENSE for details. """ import re from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ words, include from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic from pygments.util import shebang_matches, unirange __all__ = ['JuliaLexer', 'JuliaConsoleLexer'] allowed_variable = ( u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' % ((unirange(0x10000, 0x10ffff), ) * 2)) class JuliaLexer(RegexLexer): """ For `Julia <http://julialang.org/>`_ source code. .. versionadded:: 1.6 """ name = 'Julia' aliases = ['julia', 'jl'] filenames = ['*.jl'] mimetypes = ['text/x-julia', 'application/x-julia'] flags = re.MULTILINE | re.UNICODE
:license: BSD, see LICENSE for details. """ import re from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ words, include from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic from pygments.util import shebang_matches, unirange __all__ = ['JuliaLexer', 'JuliaConsoleLexer'] allowed_variable = ( u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' % ((unirange(0x10000, 0x10ffff),) * 2)) class JuliaLexer(RegexLexer): """ For `Julia <http://julialang.org/>`_ source code. .. versionadded:: 1.6 """ name = 'Julia' aliases = ['julia', 'jl'] filenames = ['*.jl'] mimetypes = ['text/x-julia', 'application/x-julia'] flags = re.MULTILINE | re.UNICODE
class JuliaLexer(RegexLexer): """ For `Julia <http://julialang.org/>`_ source code. .. versionadded:: 1.6 """ name = 'Julia' aliases = ['julia', 'jl'] filenames = ['*.jl'] mimetypes = ['text/x-julia', 'application/x-julia'] flags = re.MULTILINE | re.UNICODE builtins = [ 'exit', 'whos', 'edit', 'load', 'is', 'isa', 'isequal', 'typeof', 'tuple', 'ntuple', 'uid', 'hash', 'finalizer', 'convert', 'promote', 'subtype', 'typemin', 'typemax', 'realmin', 'realmax', 'sizeof', 'eps', 'promote_type', 'method_exists', 'applicable', 'invoke', 'dlopen', 'dlsym', 'system', 'error', 'throw', 'assert', 'new', 'Inf', 'Nan', 'pi', 'im', ] tokens = { 'root': [ (r'\n', Text), (r'[^\S\n]+', Text), (r'#=', Comment.Multiline, "blockcomment"), (r'#.*$', Comment), (r'[]{}:(),;[@]', Punctuation), (r'\\\n', Text), (r'\\', Text), # keywords (r'(begin|while|for|in|return|break|continue|' r'macro|quote|let|if|elseif|else|try|catch|end|' r'bitstype|ccall|do|using|module|import|export|' r'importall|baremodule|immutable)\b', Keyword), (r'(local|global|const)\b', Keyword.Declaration), (r'(Bool|Int|Int8|Int16|Int32|Int64|Uint|Uint8|Uint16|Uint32|Uint64' r'|Float32|Float64|Complex64|Complex128|Any|Nothing|None)\b', Keyword.Type), # functions (r'(function)((?:\s|\\\s)+)', bygroups(Keyword, Name.Function), 'funcname'), # types (r'(type|typealias|abstract|immutable)((?:\s|\\\s)+)', bygroups(Keyword, Name.Class), 'typename'), # operators (r'==|!=|<=|>=|->|&&|\|\||::|<:|[-~+/*%=<>&^|.?!$]', Operator), (r'\.\*|\.\^|\.\\|\.\/|\\', Operator), # builtins ('(' + '|'.join(builtins) + r')\b', Name.Builtin), # backticks (r'`(?s).*?`', String.Backtick), # chars (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|" r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char), # try to match trailing transpose (r'(?<=[.\w)\]])\'+', Operator), # strings (r'(?:[IL])"', String, 'string'), (r'[E]?"', String, combined('stringescape', 'string')), # names (r'@[\w.]+', Name.Decorator), (u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' % ((unirange(0x10000, 0x10ffff),)*2), Name), # numbers (r'(\d+(_\d+)+\.\d*|\d*\.\d+(_\d+)+)([eEf][+-]?[0-9]+)?', Number.Float), (r'(\d+\.\d*|\d*\.\d+)([eEf][+-]?[0-9]+)?', Number.Float), (r'\d+(_\d+)+[eEf][+-]?[0-9]+', Number.Float), (r'\d+[eEf][+-]?[0-9]+', Number.Float), (r'0b[01]+(_[01]+)+', Number.Bin), (r'0b[01]+', Number.Bin), (r'0o[0-7]+(_[0-7]+)+', Number.Oct), (r'0o[0-7]+', Number.Oct), (r'0x[a-fA-F0-9]+(_[a-fA-F0-9]+)+', Number.Hex), (r'0x[a-fA-F0-9]+', Number.Hex), (r'\d+(_\d+)+', Number.Integer), (r'\d+', Number.Integer) ], 'funcname': [ ('[a-zA-Z_]\w*', Name.Function, '#pop'), ('\([^\s\w{]{1,2}\)', Operator, '#pop'), ('[^\s\w{]{1,2}', Operator, '#pop'), ], 'typename': [ ('[a-zA-Z_]\w*', Name.Class, '#pop') ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], "blockcomment": [ (r'[^=#]', Comment.Multiline), (r'#=', Comment.Multiline, '#push'), (r'=#', Comment.Multiline, '#pop'), (r'[=#]', Comment.Multiline), ], 'string': [ (r'"', String, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings # Interpolation is defined as "$" followed by the shortest full # expression, which is something we can't parse. # Include the most common cases here: $word, and $(paren'd expr). (r'\$[a-zA-Z_]+', String.Interpol), (r'\$\(', String.Interpol, 'in-intp'), # @printf and @sprintf formats (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), (r'[^$%"\\]+', String), # unhandled special signs (r'[$%"\\]', String), ], 'in-intp': [ (r'[^()]+', String.Interpol), (r'\(', String.Interpol, '#push'), (r'\)', String.Interpol, '#pop'), ] } def analyse_text(text): return shebang_matches(text, r'julia')