PERCENT.setParseAction(lambda x: unichr(int(x[0][1:], 16))) # [170] PLX ::= PERCENT | PN_LOCAL_ESC PLX = PERCENT | PN_LOCAL_ESC # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Combine( (Regex(u"[%s0-9:]" % PN_CHARS_U_re, flags=re.U) | PLX) + ZeroOrMore( (Regex(u"[%s\\.:]" % PN_CHARS_re, flags=re.U) | PLX) + Optional(Regex(u"[%s:]" % PN_CHARS_re, flags=re.U) | PLX) ) ) # [141] PNAME_LN ::= PNAME_NS PN_LOCAL PNAME_LN = PNAME_NS + Param("localname", PN_LOCAL.leaveWhitespace()) # [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? BLANK_NODE_LABEL = Regex(ur"_:[0-9%s](?:[\.%s]*[%s])?" % (PN_CHARS_U_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) BLANK_NODE_LABEL.setParseAction(lambda x: rdflib.BNode(x[0])) # [166] VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )* VARNAME = Regex(u"[%s0-9][%s0-9\u00B7\u0300-\u036F\u203F-\u2040]*" % (PN_CHARS_U_re, PN_CHARS_U_re), flags=re.U) # [143] VAR1 ::= '?' VARNAME VAR1 = Combine(Suppress("?") + VARNAME) # [144] VAR2 ::= '$' VARNAME VAR2 = Combine(Suppress("$") + VARNAME)
# HEX = Regex('[0-9A-Fa-f]') # not needed # [171] PERCENT ::= '%' HEX HEX PERCENT = Regex('%[0-9a-fA-F]{2}') PERCENT.setParseAction(lambda x: chr(int(x[0][1:], 16))) # [170] PLX ::= PERCENT | PN_LOCAL_ESC PLX = PERCENT | PN_LOCAL_ESC # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Combine((Regex('[%s0-9:]' % PN_CHARS_U_re, flags=re.U) | PLX) + ZeroOrMore((Regex( '[%s\\.:]' % PN_CHARS_re, flags=re.U) | PLX) + Optional(Regex('[%s:]' % PN_CHARS_re, flags=re.U) | PLX))) # [141] PNAME_LN ::= PNAME_NS PN_LOCAL PNAME_LN = PNAME_NS + Param('localname', PN_LOCAL.leaveWhitespace()) # [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? BLANK_NODE_LABEL = Regex(r'_:[0-9%s](?:[\.%s]*[%s])?' % ( PN_CHARS_U_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) BLANK_NODE_LABEL.setParseAction(lambda x: rdflib.BNode(x[0])) # [166] VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )* VARNAME = Regex('[%s0-9][%s0-9\u00B7\u0300-\u036F\u203F-\u2040]*' % ( PN_CHARS_U_re, PN_CHARS_U_re), flags=re.U) # [143] VAR1 ::= '?' VARNAME VAR1 = Combine(Suppress('?') + VARNAME) # [144] VAR2 ::= '$' VARNAME
# [137] PrefixedName ::= PNAME_LN | PNAME_NS PrefixedName = Comp('pname', PNAME_LN | PNAME_NS) # [136] iri ::= IRIREF | PrefixedName iri = IRIREF | PrefixedName # [135] String ::= STRING_LITERAL1 | STRING_LITERAL2 | STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 String = STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 | STRING_LITERAL1 | STRING_LITERAL2 # [129] RDFLiteral ::= String ( LANGTAG | ( '^^' iri ) )? RDFLiteral = Comp( 'literal', Param('string', String) + Optional( Param('lang', LANGTAG.leaveWhitespace()) | Literal('^^').leaveWhitespace() + Param('datatype', iri).leaveWhitespace())) # [132] NumericLiteralPositive ::= INTEGER_POSITIVE | DECIMAL_POSITIVE | DOUBLE_POSITIVE NumericLiteralPositive = DOUBLE_POSITIVE | DECIMAL_POSITIVE | INTEGER_POSITIVE # [133] NumericLiteralNegative ::= INTEGER_NEGATIVE | DECIMAL_NEGATIVE | DOUBLE_NEGATIVE NumericLiteralNegative = DOUBLE_NEGATIVE | DECIMAL_NEGATIVE | INTEGER_NEGATIVE # [131] NumericLiteralUnsigned ::= INTEGER | DECIMAL | DOUBLE NumericLiteralUnsigned = DOUBLE | DECIMAL | INTEGER # [130] NumericLiteral ::= NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative NumericLiteral = NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative
RTFCODE = OneOrMore(RTF_CTRL | BRCKT) # handle "{\*\htmltag4 \par }"" HTM_CTRL_NEWLINE = HTM_CTRL.suppress() + Literal("\\par").setParseAction(replaceWith("\n")) HTM_CTRL_NEWLINE.suppress() # handle "{\*\htmltag84 }" HTM_CTRL_EMPTY = HTM_CTRL.suppress() + Word(" ").leaveWhitespace() HTM_TXT = OneOrMore(Word(htmchars)) HTM_CTRL_CONTENT = HTM_CTRL.suppress() + Optional(BRCKT_R).suppress() + HTM_TXT # Both opening and closing tags and their contents HTM_TAG = Combine(Literal("<") + Word(htmchars) + Literal(">")) HTM_TAG.leaveWhitespace() HTM_TAG.setName("HtmlTag") #HTM_TAG_EMPTYCONTENT = Word(" ") + BRCKT_R.suppress() HTM_TAG_PLUS_CONTENT = HTM_TAG + Optional(BRCKT_R.suppress() + HTM_TXT) HTM_TAG_PLUS_CONTENT.leaveWhitespace() # Text content inside HTML HTM_CONTENT_IND = Suppress("\\htmlrtf0 ") HTM_CONTENT = HTM_CONTENT_IND + OneOrMore(Word(htmchars)) HTM_CONTENT.setName("Html content") HTM_CONTENT.leaveWhitespace() RTFLINK = Suppress("HYPERLINK \"") + Word(htmchars.replace('"','')) + Literal('"').suppress()
Var = VAR1 | VAR2 Var.setParseAction(lambda x: rdflib.term.Variable(x[0])) # [137] PrefixedName ::= PNAME_LN | PNAME_NS PrefixedName = Comp('pname', PNAME_LN | PNAME_NS) # [136] iri ::= IRIREF | PrefixedName iri = IRIREF | PrefixedName # [135] String ::= STRING_LITERAL1 | STRING_LITERAL2 | STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 String = STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 | STRING_LITERAL1 | STRING_LITERAL2 # [129] RDFLiteral ::= String ( LANGTAG | ( '^^' iri ) )? RDFLiteral = Comp('literal', Param('string', String) + Optional(Param( 'lang', LANGTAG.leaveWhitespace()) | Literal('^^').leaveWhitespace() + Param('datatype', iri).leaveWhitespace())) # [132] NumericLiteralPositive ::= INTEGER_POSITIVE | DECIMAL_POSITIVE | DOUBLE_POSITIVE NumericLiteralPositive = DOUBLE_POSITIVE | DECIMAL_POSITIVE | INTEGER_POSITIVE # [133] NumericLiteralNegative ::= INTEGER_NEGATIVE | DECIMAL_NEGATIVE | DOUBLE_NEGATIVE NumericLiteralNegative = DOUBLE_NEGATIVE | DECIMAL_NEGATIVE | INTEGER_NEGATIVE # [131] NumericLiteralUnsigned ::= INTEGER | DECIMAL | DOUBLE NumericLiteralUnsigned = DOUBLE | DECIMAL | INTEGER # [130] NumericLiteral ::= NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative NumericLiteral = NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative # [134] BooleanLiteral ::= 'true' | 'false' BooleanLiteral = Keyword('true').setParseAction(lambda: rdflib.Literal(True)) |\
# HEX = Regex('[0-9A-Fa-f]') # not needed # [171] PERCENT ::= '%' HEX HEX PERCENT = Regex('%[0-9a-fA-F]{2}') PERCENT.setParseAction(lambda x: unichr(int(x[0][1:], 16))) # [170] PLX ::= PERCENT | PN_LOCAL_ESC PLX = PERCENT | PN_LOCAL_ESC # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Combine((Regex(u'[%s0-9:]' % PN_CHARS_U_re, flags=re.U) | PLX) + ZeroOrMore((Regex( u'[%s\\.:]' % PN_CHARS_re, flags=re.U) | PLX) + Optional(Regex(u'[%s:]' % PN_CHARS_re, flags=re.U) | PLX))) # [141] PNAME_LN ::= PNAME_NS PN_LOCAL PNAME_LN = PNAME_NS + Param('localname', PN_LOCAL.leaveWhitespace()) # [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? BLANK_NODE_LABEL = Regex(ur'_:[0-9%s](?:[\.%s]*[%s])?' % ( PN_CHARS_U_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) BLANK_NODE_LABEL.setParseAction(lambda x: rdflib.BNode(x[0])) # [166] VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )* VARNAME = Regex(u'[%s0-9][%s0-9\u00B7\u0300-\u036F\u203F-\u2040]*' % ( PN_CHARS_U_re, PN_CHARS_U_re), flags=re.U) # [143] VAR1 ::= '?' VARNAME VAR1 = Combine(Suppress('?') + VARNAME) # [144] VAR2 ::= '$' VARNAME
# logical-line = LINE-START *extended-line LINE-END # NOTE: according to tests with MS Office 2007, and contrary to MS-VBAL, the line continuation pattern requires at # least one whitespace before the underscore, but not after. # line_continuation = (White(min=1) + '_' + White(min=0) + line_terminator).leaveWhitespace() whitespaces = Word(' \t\x19').leaveWhitespace() line_continuation = (whitespaces + '_' + Optional(whitespaces) + line_terminator).leaveWhitespace() # replace line_continuation by a single space: line_continuation.setParseAction(replaceWith(' ')) extended_line = Combine( ZeroOrMore(line_continuation | non_line_termination_character) + line_terminator) module_body_logical_structure = ZeroOrMore(extended_line) logical_line = LineStart() + ZeroOrMore( extended_line.leaveWhitespace()) + line_terminator # rather than LineEnd() module_body_lines = Combine(ZeroOrMore(logical_line)) # .setDebug() # === FUNCTIONS ============================================================== def vba_collapse_long_lines(vba_code): """Parse a VBA module code to detect continuation line characters (underscore) and collapse split lines. Continuation line characters are replaced by spaces. @param vba_code (str) The VBA code to modify. @return (str) The given VBA code with long lines collapsed. """
# [171] PERCENT ::= '%' HEX HEX PERCENT = Regex('%[0-9a-fA-F]{2}') PERCENT.setParseAction(lambda x: unichr(int(x[0][1:], 16))) # [170] PLX ::= PERCENT | PN_LOCAL_ESC PLX = PERCENT | PN_LOCAL_ESC # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Combine( (Regex(u'[%s0-9:]' % PN_CHARS_U_re, flags=re.U) | PLX) + ZeroOrMore((Regex(u'[%s\\.:]' % PN_CHARS_re, flags=re.U) | PLX) + Optional(Regex(u'[%s:]' % PN_CHARS_re, flags=re.U) | PLX))) # [141] PNAME_LN ::= PNAME_NS PN_LOCAL PNAME_LN = PNAME_NS + Param('localname', PN_LOCAL.leaveWhitespace()) # [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? BLANK_NODE_LABEL = Regex(ur'_:[0-9%s](?:[\.%s]*[%s])?' % (PN_CHARS_U_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) BLANK_NODE_LABEL.setParseAction(lambda x: rdflib.BNode(x[0])) # [166] VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )* VARNAME = Regex(u'[%s0-9][%s0-9\u00B7\u0300-\u036F\u203F-\u2040]*' % (PN_CHARS_U_re, PN_CHARS_U_re), flags=re.U) # [143] VAR1 ::= '?' VARNAME VAR1 = Combine(Suppress('?') + VARNAME)
# [137] PrefixedName ::= PNAME_LN | PNAME_NS PrefixedName = Comp("pname", PNAME_LN | PNAME_NS) # [136] iri ::= IRIREF | PrefixedName iri = IRIREF | PrefixedName # [135] String ::= STRING_LITERAL1 | STRING_LITERAL2 | STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 String = STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 | STRING_LITERAL1 | STRING_LITERAL2 # [129] RDFLiteral ::= String ( LANGTAG | ( '^^' iri ) )? RDFLiteral = Comp( "literal", Param("string", String) + Optional( Param("lang", LANGTAG.leaveWhitespace()) | Literal("^^").leaveWhitespace() + Param("datatype", iri).leaveWhitespace() ), ) # [132] NumericLiteralPositive ::= INTEGER_POSITIVE | DECIMAL_POSITIVE | DOUBLE_POSITIVE NumericLiteralPositive = DOUBLE_POSITIVE | DECIMAL_POSITIVE | INTEGER_POSITIVE # [133] NumericLiteralNegative ::= INTEGER_NEGATIVE | DECIMAL_NEGATIVE | DOUBLE_NEGATIVE NumericLiteralNegative = DOUBLE_NEGATIVE | DECIMAL_NEGATIVE | INTEGER_NEGATIVE # [131] NumericLiteralUnsigned ::= INTEGER | DECIMAL | DOUBLE NumericLiteralUnsigned = DOUBLE | DECIMAL | INTEGER # [130] NumericLiteral ::= NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative NumericLiteral = (
Combine(Optional("?" + Query)) Request_Line = Method + SP + ICAP_URI + SP + ICAP_Version + CRLF Request_Line.leaveWhitespace() text = set(octet).difference(set(ctl)) TEXT = oneOf(list(text)) TEXT.leaveWhitespace() Generic_Field_Content = ZeroOrMore(TEXT) Generic_Field_Content.leaveWhitespace() """ <the OCTETs making up the field-value and consisting of either *TEXT or combinations of token, separators, and quoted-string> """ Generic_Field_Value = Combine(Generic_Field_Content) ^ LWS Generic_Field_Value.leaveWhitespace() Extension_Field_Name = Literal("X-") + Token Extension_Field_Name.leaveWhitespace() Common_Field_Name = Literal("Cache-Control") ^ Literal("Connection") ^ \ Literal("Date") ^ Literal("Expires") ^ \ Literal("Pragma") ^ Literal("Trailer") ^ \ Literal("Upgrade") ^ Literal("Encapsulated") ^ \ Extension_Field_Name Common_Field_Name.leaveWhitespace() # REQUEST Request_Field_Name = Literal("Authorization") ^ Literal("Allow") ^ \ Literal("From") ^ Literal("Host") ^ Literal("Referer") ^ \ Literal("User-Agent") ^ Literal("Preview") Request_Field_Name.leaveWhitespace() Request_Fields = Combine(Request_Field_Name) ^ Combine(Common_Field_Name) Request_Fields.leaveWhitespace()
# [137] PrefixedName ::= PNAME_LN | PNAME_NS PrefixedName = Comp("pname", PNAME_LN | PNAME_NS) # [136] iri ::= IRIREF | PrefixedName iri = IRIREF | PrefixedName # [135] String ::= STRING_LITERAL1 | STRING_LITERAL2 | STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 String = STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 | STRING_LITERAL1 | STRING_LITERAL2 # [129] RDFLiteral ::= String ( LANGTAG | ( '^^' iri ) )? RDFLiteral = Comp( "literal", Param("string", String) + Optional( Param("lang", LANGTAG.leaveWhitespace()) | Literal("^^").leaveWhitespace() + Param("datatype", iri).leaveWhitespace() ), ) # [132] NumericLiteralPositive ::= INTEGER_POSITIVE | DECIMAL_POSITIVE | DOUBLE_POSITIVE NumericLiteralPositive = DOUBLE_POSITIVE | DECIMAL_POSITIVE | INTEGER_POSITIVE # [133] NumericLiteralNegative ::= INTEGER_NEGATIVE | DECIMAL_NEGATIVE | DOUBLE_NEGATIVE NumericLiteralNegative = DOUBLE_NEGATIVE | DECIMAL_NEGATIVE | INTEGER_NEGATIVE # [131] NumericLiteralUnsigned ::= INTEGER | DECIMAL | DOUBLE NumericLiteralUnsigned = DOUBLE | DECIMAL | INTEGER # [130] NumericLiteral ::= NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative NumericLiteral = NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative