def getToken(self): tableCell = Regex(r"(?P<text>(.|(\\\n))*?)\|\|") tableCell.setParseAction(self.__convertTableCell) tableRow = AtLineStart(Literal("||") + OneOrMore(tableCell.leaveWhitespace()) + Optional(LineEnd())) tableRow.setParseAction(self.__convertTableRow) table = AtLineStart(Regex(r"\|\| *(?P<params>.+)?") + LineEnd() + OneOrMore(tableRow)) table = table.setParseAction(self.__convertTable)("table") return table
RPAREN = Suppress(')') QUOTE = Suppress('"') COMMA = Suppress(',') AT = Suppress('@') EQUALS = Suppress('=') HASH = Suppress('#') def bracketed(expr): """ Return matcher for `expr` between curly brackets or parentheses """ return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY) # Define parser components for strings (the hard bit) chars_no_curly = Regex(r"[^{}]+") chars_no_curly.leaveWhitespace() chars_no_quotecurly = Regex(r'[^"{}]+') chars_no_quotecurly.leaveWhitespace() # Curly string is some stuff without curlies, or nested curly sequences curly_string = Forward() curly_item = Group(curly_string) | chars_no_curly curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY # quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quoted_item = Group(curly_string) | chars_no_quotecurly quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE # Numbers can just be numbers. Only integers though. number = Regex('[0-9]+') # Basis characters (by exclusion) for variable / field names. The following
return self.name == other.name # Character literals LCURLY, RCURLY, LPAREN, RPAREN, QUOTE, COMMA, AT, EQUALS, HASH = map( Suppress, '{}()",@=#') def bracketed(expr): """ Return matcher for `expr` between curly brackets or parentheses """ return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY) # Define parser components for strings (the hard bit) chars_no_curly = Regex(r"[^{}]+") chars_no_curly.leaveWhitespace() chars_no_quotecurly = Regex(r'[^"{}]+') chars_no_quotecurly.leaveWhitespace() # Curly string is some stuff without curlies, or nested curly sequences curly_string = Forward() curly_item = Group(curly_string) | chars_no_curly curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY # quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quoted_item = Group(curly_string) | chars_no_quotecurly quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE # Numbers can just be numbers. Only integers though. number = Regex("[0-9]+") # Basis characters (by exclusion) for variable / field names. The following
u"""([%(PN_CHARS_U)s:0-9]|%(PLX)s) (([%(PN_CHARS)s\\.:]|%(PLX)s)* ([%(PN_CHARS)s:]|%(PLX)s) )?""" % dict(PN_CHARS_U=PN_CHARS_U_re, PN_CHARS=PN_CHARS_re, PLX=PLX_re), flags=re.X | re.UNICODE) def _hexExpand(match): return unichr(int(match.group(0)[1:], 16)) PN_LOCAL.setParseAction( lambda x: re.sub("(%s)" % PERCENT_re, _hexExpand, x[0])) # [141] PNAME_LN ::= PNAME_NS PN_LOCAL PNAME_LN = PNAME_NS + Param('localname', PN_LOCAL.leaveWhitespace()) # [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? BLANK_NODE_LABEL = Regex(u'_:[0-9%s](?:[\\.%s]*[%s])?' % (PN_CHARS_U_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) BLANK_NODE_LABEL.setParseAction(lambda x: rdflib.BNode(x[0])) # [166] VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )* VARNAME = Regex(u'[%s0-9][%s0-9\u00B7\u0300-\u036F\u203F-\u2040]*' % (PN_CHARS_U_re, PN_CHARS_U_re), flags=re.U) # [143] VAR1 ::= '?' VARNAME VAR1 = Combine(Suppress('?') + VARNAME)
from bibtexvcs.bibfile import Entry, Comment, ImplicitComment, MacroReference, MacroDefinition, Name, Preamble LCURLY = Suppress("{") RCURLY = Suppress("}") QUOTE = Suppress('"') COMMA = Suppress(",") AT = Suppress("@") EQUALS = Suppress("=") HASH = Suppress("#") bracketed = lambda expr: LCURLY + expr + RCURLY # Define parser components for strings (the hard bit) charsNoCurly = Regex(r"[^{}]+") charsNoCurly.leaveWhitespace() charsNoQuotecurly = Regex(r'[^"{}]+') charsNoQuotecurly.leaveWhitespace() # Curly string is some stuff without curlies, or nested curly sequences curlyString = Forward().leaveWhitespace() curlyItem = Group(curlyString) | charsNoCurly curlyString <<= LCURLY + ZeroOrMore(curlyItem) + RCURLY # quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quotedItem = Group(curlyString) | charsNoQuotecurly quotedString = QUOTE + ZeroOrMore(quotedItem) + QUOTE number = Regex("[0-9]+") # Basis characters (by exclusion) for variable / field names. The following
PN_LOCAL = Regex(u"""([%(PN_CHARS_U)s:0-9]|%(PLX)s) (([%(PN_CHARS)s\\.:]|%(PLX)s)* ([%(PN_CHARS)s:]|%(PLX)s) )?"""%dict(PN_CHARS_U=PN_CHARS_U_re, PN_CHARS=PN_CHARS_re, PLX=PLX_re), flags=re.X|re.UNICODE) def _hexExpand(match): return unichr(int(match.group(0)[1:], 16)) PN_LOCAL.setParseAction(lambda x: re.sub("(%s)"%PERCENT_re, _hexExpand, x[0])) # [141] PNAME_LN ::= PNAME_NS PN_LOCAL PNAME_LN = PNAME_NS + Param('localname', PN_LOCAL.leaveWhitespace()) # [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? BLANK_NODE_LABEL = Regex(u'_:[0-9%s](?:[\\.%s]*[%s])?' % ( PN_CHARS_U_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) BLANK_NODE_LABEL.setParseAction(lambda x: rdflib.BNode(x[0])) # [166] VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )* VARNAME = Regex(u'[%s0-9][%s0-9\u00B7\u0300-\u036F\u203F-\u2040]*' % ( PN_CHARS_U_re, PN_CHARS_U_re), flags=re.U) # [143] VAR1 ::= '?' VARNAME VAR1 = Combine(Suppress('?') + VARNAME) # [144] VAR2 ::= '$' VARNAME
def getToken(self): textRegex = r'(?:(?:\w-\w)|\w)+' token = Regex(textRegex)('text') token.leaveWhitespace() return token
GreaterThanCondition, GreaterThanOrEqualCondition, LessThanCondition, LessThanOrEqualCondition, RegexCondition, RegexNegatedCondition) end_of_line = Regex(r' *\n') ^ LineEnd() settings_table = Literal('*** Settings ***') + Regex(r'[^\*]+(?=\*)') settings_table.setParseAction(lambda t: '\n'.join(t)) variables_table = Literal('*** Variables ***') + Regex(r'[^\*]+(?=\*)') variables_table.setParseAction(lambda t: '\n'.join(t)) keywords_table = Literal('*** Keywords ***') + CharsNotIn('') + StringEnd() keywords_table.setParseAction(lambda t: '\n'.join(t)) state_name = Regex(r'\w+( \w+)*') state_name.leaveWhitespace() state_name = state_name.setResultsName('state_name') robo_step = Regex(r'([\w\$\{\}][ \w\$\{\}]*[\w\}]|\w)') robo_step.leaveWhitespace() robo_step = robo_step.setResultsName('robo_step') variable = Regex(Variable.REGEX) variable_value = Regex(r'[\w\$\{\}!?\-\=\_\.\/]+( [\w\$\{\}!?\-\=\_\.\/]+)*') splitter = Literal(' ') + OneOrMore(' ') splitter.setParseAction(lambda t: ' ') variable_values = (variable_value + ZeroOrMore(splitter + variable_value)).setResultsName('variable_values') variable_values.setParseAction(lambda t: [[t[2 * i] for i in range(int((len(t) + 1) / 2))]])
def getToken(self): textRegex = r'(?:(?:[^\W_]-[^\W_])|[^\W_])+' token = Regex(textRegex, re.UNICODE)('text') token.leaveWhitespace() return token