def define_number(self): """ Return the syntax definition for a number in Arabic Numerals. Override this method to support numeral systems other than Arabic Numerals (0-9). Do not override this method just to change the character used to separate thousands and decimals: Use :attr:`T_THOUSANDS_SEPARATOR` and :attr:`T_DECIMAL_SEPARATOR`, respectively. """ # Defining the basic tokens: to_dot = lambda t: "." to_plus = lambda t: "+" to_minus = lambda t: "-" positive_sign = Literal(self._grammar.get_token("positive_sign")) positive_sign.setParseAction(to_plus) negative_sign = Literal(self._grammar.get_token("negative_sign")) negative_sign.setParseAction(to_minus) decimal_sep = Literal(self._grammar.get_token("decimal_separator")) decimal_sep.setParseAction(to_dot) thousands_sep = Suppress( self._grammar.get_token("thousands_separator")) digits = Word(nums) # Building the integers and decimals: sign = positive_sign | negative_sign thousands = Word(nums, max=3) + \ OneOrMore(thousands_sep + Word(nums, exact=3)) integers = thousands | digits decimals = decimal_sep + digits number = Combine(Optional(sign) + integers + Optional(decimals)) number.setParseAction(self.make_number) number.setName("number") return number
def define_number(self): """ Return the syntax definition for a number in Arabic Numerals. Override this method to support numeral systems other than Arabic Numerals (0-9). Do not override this method just to change the character used to separate thousands and decimals: Use :attr:`T_THOUSANDS_SEPARATOR` and :attr:`T_DECIMAL_SEPARATOR`, respectively. """ # Defining the basic tokens: to_dot = lambda t: "." to_plus = lambda t: "+" to_minus = lambda t: "-" positive_sign = Literal(self._grammar.get_token("positive_sign")) positive_sign.setParseAction(to_plus) negative_sign = Literal(self._grammar.get_token("negative_sign")) negative_sign.setParseAction(to_minus) decimal_sep = Literal(self._grammar.get_token("decimal_separator")) decimal_sep.setParseAction(to_dot) thousands_sep = Suppress(self._grammar.get_token("thousands_separator")) digits = Word(nums) # Building the integers and decimals: sign = positive_sign | negative_sign thousands = Word(nums, max=3) + \ OneOrMore(thousands_sep + Word(nums, exact=3)) integers = thousands | digits decimals = decimal_sep + digits number = Combine(Optional(sign) + integers + Optional(decimals)) number.setParseAction(self.make_number) number.setName("number") return number
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join([unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress(self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine(namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def _create_parser() -> ParserElement: # operators in the format later used by infixNotation operator_list = [ (None, 2, opAssoc.LEFT, BooleanAndOperation._create_from_implicit_tokens), (CaselessKeyword('not') | "~" | "!", 1, opAssoc.RIGHT, BooleanNotOperation._create_from_tokens), (CaselessKeyword('and') | "&", 2, opAssoc.LEFT, BooleanAndOperation._create_from_tokens), (CaselessKeyword('xor') | "^", 2, opAssoc.LEFT, BooleanXorOperation._create_from_tokens), (CaselessKeyword('or') | "|", 2, opAssoc.LEFT, BooleanOrOperation._create_from_tokens), ] # terms (atoms) that will be combined with the boolean operators term_list = [ (CaselessKeyword('tag'), TagFilterTerm._create_from_tokens), (CaselessKeyword('ingr'), IngredientFilterTerm._create_from_tokens), (CaselessKeyword('unit'), UnitFilterTerm._create_from_tokens), (None, AnyFilterTerm._create_from_tokens), ] # extract keywords that can operator_expressions = [om[0] for om in operator_list if om[0] is not None] term_expressions = [tm[0] for tm in term_list if tm[0] is not None] reserved_expressions = operator_expressions + term_expressions # quoted string indicates exact macthc quoted_filter_string = (QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\')).setResultsName('string') # quoted_filter_string.setDebug(True) quoted_filter_string.setName("quoted_filter_string") quoted_filter_string.setParseAction(ExactFilterString._create_from_tokens) # not quoted string is inexact match, can't contain whitespace or be an operator unquoted_filter_string = ~MatchFirst(reserved_expressions) + Regex(r'[^\s\(\)]+', flags=re.U).setResultsName('string') # unquoted_filter_string.setDebug(True) unquoted_filter_string.setName("unquoted_filter_string") unquoted_filter_string.setParseAction(FuzzyFilterString._create_from_tokens) # regular expressions aren't parsed in the grammar but delegated to python re.compile in the parser action regex_filter_string = QuotedString('/', escChar='\\') regex_filter_string.setName("regex_filter_string") regex_filter_string.setParseAction(RegexFilterString._create_from_tokens) # unquoted_filter_string must be last, so that initial quotes are handled correctly filter_string = regex_filter_string | quoted_filter_string | unquoted_filter_string filter_string.setParseAction(lambda toks: toks[0]) filter_terms = [] for prefix_expression, term_action in term_list: if prefix_expression is not None: filter_term = Combine(prefix_expression + ':' + filter_string.setResultsName("filter_string")) filter_term.setName("filter_term_"+str(prefix_expression.match)) else: filter_term = filter_string.setResultsName("filter_string") filter_term.setName("filter_term_None") # filter_term.setDebug(True) filter_term.addParseAction(term_action) filter_terms.append(filter_term) filter_term = MatchFirst(filter_terms) filter_expr = infixNotation(filter_term, operator_list) return filter_expr
def _build_grammar(self): expr = Forward() float_lit = Combine(Word(nums) + '.' + Word(nums)) float_lit.setName('float') float_lit.setParseAction(lambda x: \ self.to_literal(float(x[0]))) int_lit = Word(nums) int_lit.setName('int') int_lit.setParseAction(lambda x: \ self.to_literal(int(x[0]))) num = (float_lit | int_lit) num.setParseAction(lambda x: x[0]) tag_name = Word(alphas + "_", alphanums + "_") tag_name.setName('tag_name') tag_name.setParseAction(lambda t: tag_reference.TagReference(t[0])) quoted_string = QuotedString("'") quoted_string.setParseAction(lambda s: self.to_literal(s[0])) oper = oneOf('+ * / -') oper.setParseAction(lambda o: o[0]) lpar = Literal("(").suppress() rpar = Literal(")").suppress() arith = Group(lpar + expr + oper + expr + rpar) arith.setParseAction(lambda t: \ self.to_arith(t[0][0], t[0][1], t[0][2])) assign = tag_name + '=' + expr assign.setName('assign') assign.setParseAction(lambda x: self.to_assign(x[0],x[2])) print_tags = Literal('?') print_tags.setParseAction(lambda x: self.to_print_tags()) expr <<(arith|assign|tag_name|num|quoted_string|print_tags) expr.setParseAction(lambda x: x[0]) return expr
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: try: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) except NameError: unicode_numbers = "".join( [chr(n) for n in range(0x10000) if chr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") expop = Literal('^') multop = oneOf('* /') factop = Literal('!') modop = Literal('%') signop = oneOf('+ -') opers = expop | signop | multop | factop | modop identifier = identifier + NotAny(opers) return identifier
def define_math(self): digits = Word(nums) variable = Word(alphas + self._grammar.get_token("namespace_separator")) to_dot = lambda t: "." decimal_sep = Literal(self._grammar.get_token("decimal_separator")) decimal_sep.setParseAction(to_dot) thousands_sep = Suppress( self._grammar.get_token("thousands_separator")) thousands = Word( nums, max=3) + OneOrMore(thousands_sep + Word(nums, exact=3)) integers = thousands | digits decimals = decimal_sep + digits expop = Literal('^') signop = oneOf('+ -') multop = oneOf('* /') plusop = oneOf('+ -') factop = Literal('!') modop = Literal('%') operand = Combine((integers + Optional(decimals)) | variable) expr = operatorPrecedence(operand, [ (Literal("["), 1, opAssoc.RIGHT), (Literal("]"), 1, opAssoc.LEFT), ("!", 1, opAssoc.LEFT), (expop, 2, opAssoc.RIGHT), (signop, 1, opAssoc.RIGHT), (multop, 2, opAssoc.LEFT), (modop, 2, opAssoc.LEFT), (plusop, 2, opAssoc.LEFT), ]) expr = Combine(expr) expr.setParseAction(self.make_arithmetic) expr.setName("arithmetic") return expr
expr = Forward() # CASE case = (CASE + Group(ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") + Optional(ELSE+expr("else")) + END).addParseAction(to_case_call) selectStmt = Forward() compound = ( (Literal("-")("op").setDebug(DEBUG) + expr("params")).addParseAction(to_json_call) | (Keyword("not", caseless=True)("op").setDebug(DEBUG) + expr("params")).addParseAction(to_json_call) | (Keyword("distinct", caseless=True)("op").setDebug(DEBUG) + expr("params")).addParseAction(to_json_call) | Keyword("null", caseless=True).setName("null").setDebug(DEBUG) | case | (Literal("(").setDebug(DEBUG).suppress() + selectStmt + Literal(")").suppress()) | (Literal("(").setDebug(DEBUG).suppress() + Group(delimitedList(expr)) + Literal(")").suppress()) | realNum.setName("float").setDebug(DEBUG) | intNum.setName("int").setDebug(DEBUG) | sqlString.setName("string").setDebug(DEBUG) | ( Word(alphas)("op").setName("function name").setDebug(DEBUG) + Literal("(").setName("func_param").setDebug(DEBUG) + Optional(selectStmt | Group(delimitedList(expr)))("params") + ")" ).addParseAction(to_json_call).setDebug(DEBUG) | ident.copy().setName("variable").setDebug(DEBUG) ) expr << Group(infixNotation( compound, [ ( o,
htmchars = printables.replace("<","").replace(">","").replace("\\","").replace("{","").replace("}","") + " " + "\t" SEP = Literal(';') BRCKT_L = Literal('{') BRCKT_R = Literal('}') BRCKT = BRCKT_L | BRCKT_R BRCKT.setName("Bracket") # basic RTF control codes, ie. "\labelname3434" CTRL_LABEL = Combine(Word(alphas + "'") + Optional(Word(nums))) BASE_CTRL = Combine(Literal('\\') + CTRL_LABEL) # in some rare cases (color table declarations), control has ' ;' suffix BASE_CTRL = Combine(BASE_CTRL + SEP) | BASE_CTRL BASE_CTRL.setName("BaseControl") #\*\html93394 HTM_CTRL = Combine(Literal('\\*\\') + CTRL_LABEL) HTM_CTRL.setName("HtmlControl") RTF_CTRL = BASE_CTRL | HTM_CTRL RTF_CTRL.setName("Control") RTFCODE = OneOrMore(RTF_CTRL | BRCKT) # handle "{\*\htmltag4 \par }"" HTM_CTRL_NEWLINE = HTM_CTRL.suppress() + Literal("\\par").setParseAction(replaceWith("\n")) HTM_CTRL_NEWLINE.suppress() # handle "{\*\htmltag84 }"
TIMESTAMP = Word(printables) TIMESTAMP = TIMESTAMP.setResultsName("timestamp") TIMESTAMP.setName("Timestamp") HOSTNAME = NIL ^ Word(printables) HOSTNAME = HOSTNAME.setResultsName("hostname") HOSTNAME.setName("Hostname") APPNAME = Word("".join(set(printables) - {"["})) APPNAME = APPNAME.setResultsName("appname") APPNAME.setName("AppName") PROCID = Combine(LBRACKET + Word("".join(set(printables) - {"]"})) + RBRACKET) PROCID = PROCID.setResultsName("procid") PROCID.setName("ProcID") HEADER = PRIORITY + TIMESTAMP + SP + HOSTNAME + SP + APPNAME + PROCID MESSAGE = restOfLine.setResultsName("message") MESSAGE.setName("Message") SYSLOG_MESSAGE = HEADER + COLON + SP + MESSAGE SYSLOG_MESSAGE.leaveWhitespace() @attr.s(slots=True, frozen=True) class SyslogMessage: facility = attr.ib( type=Facility, converter=Facility, validator=attr.validators.in_(Facility)
logging.debug("ASM Macro {} = {}".format(macro, value)) ##### # Generate the assembly parser based on the available opcodes ##### # Hash-style comments comment = Regex('#.*') # Labels at the beginning of the line mark addresses, or can be used # in places where a 16-bit word argument is required. # .label - regular label # :label - exported/global label # OPCODE .label - .label gets turned into the corresponding address # OPCODE :label+4 - same, but with an offset labelprefix = oneOf(': .') label = Combine(labelprefix + Word(alphanums + "_")) label.setName('label') labeloffset = Combine(labelprefix + Word(':', alphanums + "_-+")) labeloffset.setName('labeloffset') # Bytes can be represented in binary, hex, char, or a number (0-255 or -128-127) # and may include embedded arithmetic # OPCODE 0b00001100 # OPCODE 0x0b # OPCODE 'a' # OPCODE 254-0x0a # OPCODE 'a'&0b00001111 binbyte = Combine(Literal('0b') + Char('01') * 8) binbyte.setName('binbyte') binbyte.setParseAction(lambda t: [int(t[0], 2)]) hexbyte = Combine(Literal('0x') + Char(srange("[0-9a-fA-F]")) * 2) hexbyte.setName('hexbyte') hexbyte.setParseAction(lambda t: [int(t[0], 16)])
ind = oneOf(ind_obj_suffixes) prep = oneOf(prepositions) PREP_PRO = prep("stem") + ind("suffix") + \ FollowedBy(endOfString) PREP_PRO.setName('PREP_PRO') ################# #VERBS# ################# VBZ = Combine( oneOf(vbz_pre_inflec)("prefix") + SkipTo((VBZ_CLIT + endOfString) | endOfString)("stem") + Optional(VBZ_SUFF)("suffix")) VBZ.setName('VBZ') VBZ_PRO = VBZ + oneOf(dir_obj_suffixes) VBZ_PRO.setName('VBZ_PRO') VBZ_PREP_PRO = VBZ + VB_IDO VBZ_PREP_PRO.setName('VBZ_PREP_PRO') VBZ_PRO_PREP_PRO = VBZ + VB_DO + VB_IDO VBZ_PRO_PREP_PRO.setName('VBZ_PRO_PREP_PRO') VBD = Combine( SkipTo(VBD_SUFF + Or([VBD_CLIT + endOfString, endOfString]))("stem") + VBD_SUFF("suffix")) VBD.setName('VBD')
) ] ).setName("expression").setDebugActions(*debug)) # SQL STATEMENT selectColumn = Group( Group(expr).setName("expression1")("value").setDebugActions(*debug) + Optional(Optional(AS) + ident.copy().setName("column_name1")("name").setDebugActions(*debug)) | Literal('*')("value").setDebugActions(*debug) ).setName("column").addParseAction(to_select_call) tableName = ( ident("value").setName("table name").setDebugActions(*debug) + Optional(AS) + ident("name").setName("table alias").setDebugActions(*debug) | ident.setName("table name").setDebugActions(*debug) ) join = ((CROSSJOIN | FULLJOIN | FULLOUTERJOIN | INNERJOIN | JOIN | LEFTJOIN | LEFTOUTERJOIN | RIGHTJOIN | RIGHTOUTERJOIN)("op") + Group(tableName)("join") + Optional(ON + expr("on"))).addParseAction(to_join_call) sortColumn = expr("value").setName("sort1").setDebugActions(*debug) + Optional(DESC("sort") | ASC("sort")) | \ expr("value").setName("sort2").setDebugActions(*debug) # define SQL tokens selectStmt << Group( Group(Group( delimitedList( Group( SELECT.suppress().setDebugActions(*debug) + delimitedList(selectColumn)("select") + Optional( FROM.suppress().setDebugActions(*debug) + (delimitedList(Group(tableName)) + ZeroOrMore(join))("from") +
+ '){0,1}'\ + tld_label_regex domain_fqdn = Regex(domain_fqdn_regex) domain_fqdn.setName('<strict-fqdn>') domain_fqdn.setResultsName('domain_name') # Generic fully-qualified domain name (less stringent) domain_generic_fqdn = Combine( domain_generic_label + ZeroOrMore( Literal('.') + domain_generic_label ) + Optional(Char('.')) ) domain_generic_fqdn.setName('<generic-fqdn>') domain_generic_fqdn.setResultsName('domain_name') quoted_domain_generic_fqdn = ( Combine(squote - domain_generic_fqdn - squote) | Combine(dquote - domain_generic_fqdn - dquote) ) quoted_domain_generic_fqdn.setName('<quoted_domain_name>') quotable_domain_generic_fqdn = ( Combine(squote - domain_generic_fqdn - squote) | Combine(dquote - domain_generic_fqdn - dquote) | domain_generic_fqdn ) quotable_domain_generic_fqdn.setName('<quotable_domain_name>')
def asFloatOrInt(s, l, t): """ Return an int if possible, otherwise a float""" v = t[0] try: return int(v) except ValueError: return float(v) integer = Word("0123456789").setParseAction(asInt) number = Combine( Optional(Word("0123456789")) + Literal(".") + Word("01234567890") | integer) number.setName('number') sign = oneOf("+ -") signedNumber = Combine(Optional(sign) + number).setParseAction(asFloat) lengthValue = Combine(Optional(sign) + number).setParseAction(asFloatOrInt) lengthValue.setName('lengthValue') lengthUnit = oneOf(['em', 'ex', 'px', 'pt', 'in', 'cm', 'mm', 'pc', '%'], caseless=True) #the spec says that the unit is only optional for a 0 length, but #there are just too many places where a default is permitted. #TODO: Maybe should use a ctor like optional to let clients declare it? length = lengthValue + Optional(lengthUnit, default=None) + StringEnd() length.leaveWhitespace()
NIL = L('"-"') NIL.setName("Nil") NIL.setParseAction(lambda s, l, t: NilValue) PRIORITY = LANGLE + Word(srange("[0-9]"), min=1, max=3) + RANGLE # 191 Max PRIORITY = PRIORITY.setResultsName("priority") PRIORITY.setName("Priority") PRIORITY.setParseAction(lambda s, l, t: int(t[0])) TIMESTAMP = Word(printables) TIMESTAMP = TIMESTAMP.setResultsName("timestamp") TIMESTAMP.setName("Timestamp") HOSTNAME = Combine(NIL | Word(printables)) HOSTNAME = HOSTNAME.setResultsName("hostname") HOSTNAME.setName("Hostname") APPNAME = Word("".join(set(printables) - {"["})) APPNAME = APPNAME.setResultsName("appname") APPNAME.setName("AppName") PROCID = Combine(LBRACKET + Word("".join(set(printables) - {"]"})) + RBRACKET) PROCID = PROCID.setResultsName("procid") PROCID.setName("ProcID") HEADER = PRIORITY + TIMESTAMP + SP + HOSTNAME + SP + APPNAME + PROCID MESSAGE = restOfLine.setResultsName("message") MESSAGE.setName("Message") SYSLOG_MESSAGE = HEADER + COLON + SP + MESSAGE
def asFloatOrInt(s,l,t): """ Return an int if possible, otherwise a float""" v = t[0] try: return int(v) except ValueError: return float(v) integer = Word("0123456789").setParseAction(asInt) number = Combine( Optional(Word("0123456789")) + Literal(".") + Word("01234567890") | integer ) number.setName('number') sign = oneOf("+ -") signedNumber = Combine(Optional(sign) + number).setParseAction(asFloat) lengthValue = Combine(Optional(sign) + number).setParseAction(asFloatOrInt) lengthValue.setName('lengthValue') #TODO: The physical units like in, mm lengthUnit = oneOf(['em', 'ex', 'px', 'pt', '%'], caseless=True) #the spec says that the unit is only optional for a 0 length, but #there are just too many places where a default is permitted. #TODO: Maybe should use a ctor like optional to let clients declare it?
operatorPrecedence, oneOf, ParseException, ParserElement, # @UnusedImport alphas, alphanums, ParseFatalException, # @UnusedImport ParseSyntaxException, FollowedBy, NotAny, Or, # @UnusedImport MatchFirst, Keyword, Group, White, lineno, col) # @UnusedImport from typsy.parseables import Parseable O = Optional S = Suppress number = Word(nums) point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') integer = Combine(O(plusorminus) + number) floatnumber = Combine(integer + (point + O(number)) ^ (e + integer)) integer.setName('integer') integer.setParseAction(lambda tokens: PGNative(int(tokens[0]))) floatnumber.setName('integer') floatnumber.setParseAction(lambda tokens: PGNative(float(tokens[0]))) __all__ = ['PGNative'] class PGNative(Parseable): short = 'native' @classmethod def create_from_kwargs(cls, kwargs): raise Exception() def __init__(self, value):
return None dotted_decimal = Combine( Word(nums, max=3) + Literal('.') + Word(nums, max=3) + Literal('.') + Word(nums, max=3) + Literal('.') + Word(nums, max=3)) # Bind9 naming convention ip4_addr = pyparsing_common.ipv4_address ip4_addr.setName('<ip4_addr>') ip4s_subnet = Word(nums, min=1, max=2) ip4s_subnet.setName('<ip4_or_ip4_subnet>') ip4s_prefix = Combine(ip4_addr + '/' - ip4s_subnet) ip4s_prefix.setName('<ip4subnet>') # Device Index (aka Unix sin6_scope_id) can be 32-bit integer or 64-char readable device name # _ip6_device_index = r'%([0-9]{1,10})|([a-zA-Z0-9\.\-_]{1,64})' _ip6_device_index = r'%' + \ Combine( Word(nums, min=1, max=10) | Word(alphanums, min=1, max=63) ) # Apparently, pyparsing_common.ipv6_address cannot the followingz: # - do device index suffix of "%eth0" or "%1" # - Support IPv4 notation after short or mixed IPv6 # so we roll our own IPv6 parser ########ip6_addr = pyparsing_common.ipv6_address # " ip6_addr should match: # " IPv6 addresses
def asFloatOrInt(s, l, t): """ Return an int if possible, otherwise a float""" v = t[0] try: return int(v) except ValueError: return float(v) integer = Word("0123456789").setParseAction(asInt) number = Combine( Optional(Word("0123456789")) + Literal(".") + Word("01234567890") | integer) number.setName("number") sign = oneOf("+ -") signedNumber = Combine(Optional(sign) + number).setParseAction(asFloat) lengthValue = Combine(Optional(sign) + number).setParseAction(asFloatOrInt) lengthValue.setName("lengthValue") lengthUnit = oneOf(["em", "ex", "px", "pt", "in", "cm", "mm", "pc", "%"], caseless=True) # the spec says that the unit is only optional for a 0 length, but # there are just too many places where a default is permitted. # TODO: Maybe should use a ctor like optional to let clients declare it? length = lengthValue + Optional(lengthUnit, default=None) + StringEnd() length.leaveWhitespace()
filename_base = Word(charset_filename_base) filename_base.setName('<printable-chars_has_no_squote_dquote_semicolon_slash_space>') filename_dquotable = Combine( dquote + Word(charset_filename_has_squote) + dquote ) # inverse quote types here filename_squotable = Combine( squote + Word(charset_filename_has_dquote) + squote ) # inverse quote types here filename_dquotable.setName('<printable-chars_has_no_dquote_slash>') isc_file_name = ( filename_dquotable | filename_squotable | filename_base )('filename') isc_file_name.setName('<file-name>') pathname_base = Word(charset_filename_base + '/') pathname_base.setName('<printable-chars_has_no_squote_dquote_semicolon_space>') # inverse quote types here pathname_base_dquote = Word(charset_filename_has_squote + '/') pathname_base_dquote.setName('<printable-chars_has_no_dquote>')
rr_type_set.setName('<rr_type>') # a series of RR types (without a semicolon separator) rr_type_series = OneOrMore( rr_type_set( '' ) # remove this label so we can float result to a bigger, later label )('rr_types') rr_type_series.setName('<rr_type ...>') # a series of RR types (with a semicolon separator) rr_type_list_series = OneOrMore(rr_type_set + semicolon)('rr_type_list') rr_type_list_series.setName('<rr_type; ...;>') # Following is commonly used in association with DNS zone records rr_fqdn_w_absolute = Combine(domain_generic_fqdn + Optional(Literal('.'))) rr_fqdn_w_absolute.setName('<rr_fqdn_with_abs>') # rr_domain_name is uzed in association with DNS zone records # by 'update-policy', a zone-specific option rr_domain_name = Combine(domain_generic_fqdn + Optional(Literal('.'))) rr_domain_name.setName('<rr_domain_name>') # rr_domain_name may be '*.example.net', '*.congress.gov.', or '*' rr_domain_name_or_wildcard = (rr_domain_name | Char(domain_charset_wildcard)) rr_domain_name_or_wildcard.setName('<target_rr_name>') # ( <fqdn> | '.' ) rr_domain_name_or_root = (rr_domain_name | Literal('.')) rr_domain_name_or_root.setName('<rr_domain_or_root>')
# TODO: Positive real number between zero and one. decimal = real # String ---------------------------------------------------------------------- q_string = (sglQuotedString | dblQuotedString).setName("q_string") #double_quoted_string = QuotedString('"', multiline=True,escChar="\\", # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine(double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) word = quoted_string.setName("word") # Word(alphanums) # Graph attributes ------------------------------------------------------------ hex_color = Word(hexnums, exact=2) #TODO: Optional whitespace rgb = Literal("#").suppress() + hex_color.setResultsName("red") + \ hex_color.setResultsName("green") + hex_color.setResultsName("blue") rgba = rgb + hex_color.setResultsName("alpha") hsv = decimal.setResultsName("hue") + decimal.setResultsName("saturation") + \ decimal.setResultsName("value") color_name = double_quoted_string | Word(alphas) colour = rgba | rgb | hsv | color_name #------------------------------------------------------------------------------ # A convenient function for calculating a unique name given a list of # existing names.
attr.leaveWhitespace() attr.setName('attr') hexdigits = Word(string.hexdigits, exact=2) hexdigits.setName('hexdigits') escaped = Suppress(Literal('\\')) + hexdigits escaped.setName('escaped') def _p_escaped(s, l, t): text = t[0] return chr(int(text, 16)) escaped.setParseAction(_p_escaped) value = Combine(OneOrMore(CharsNotIn('*()\\\0') | escaped)) value.setName('value') equal = Literal("=") equal.setParseAction(lambda s, l, t: pureldap.LDAPFilter_equalityMatch) approx = Literal("~=") approx.setParseAction(lambda s, l, t: pureldap.LDAPFilter_approxMatch) greater = Literal(">=") greater.setParseAction(lambda s, l, t: pureldap.LDAPFilter_greaterOrEqual) less = Literal("<=") less.setParseAction(lambda s, l, t: pureldap.LDAPFilter_lessOrEqual) filtertype = equal | approx | greater | less filtertype.setName('filtertype') simple = attr + filtertype + value simple.leaveWhitespace() simple.setName('simple')
#!/usr/bin/env python3 from pyparsing import Word, alphas, nums, Combine, oneOf # Identifier ident = Combine(oneOf("me you") + '.' + Word(alphas)) # ident.setParseAction(lambda s,l,t: [ eval(t[0]) ]) # literal values val = Combine(Word(nums) + '.' + Word(nums)) |\ Word(nums) val.setParseAction(lambda s, l, t: [float(t[0])]) val.setName("value") # Arithmetic expression expr = val + '+' + val |\ val + '-' + val |\ val + '*' + val |\ val + '/' + val |\ val # Combinations of arithmetic expressions exprs = expr + '+' + expr |\ expr + '-' + expr |\ expr + '*' + expr |\ expr + '/' + expr |\ expr # Conditional expressions c_expr = ident + '&' + ident |\ ident + '|' + ident |\
Optional(ELSE + expr("else")) + END).addParseAction(to_case_call) selectStmt = Forward() compound = ( (Literal("-")("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | (Keyword("not", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | (Keyword("distinct", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | Keyword("null", caseless=True).setName("null").setDebugActions(*debug) | case | (Literal("(").setDebugActions(*debug).suppress() + selectStmt + Literal(")").suppress()) | (Literal("(").setDebugActions(*debug).suppress() + Group(delimitedList(expr)) + Literal(")").suppress()) | realNum.setName("float").setDebugActions(*debug) | intNum.setName("int").setDebugActions(*debug) | sqlString.setName("string").setDebugActions(*debug) | (Word(alphas)("op").setName("function name").setDebugActions(*debug) + Literal("(").setName("func_param").setDebugActions(*debug) + Optional(selectStmt | Group(delimitedList(expr)))("params") + ")").addParseAction(to_json_call).setDebugActions(*debug) | ident.copy().setName("variable").setDebugActions(*debug)) expr << Group( infixNotation( compound, [(o, 3 if isinstance(o, tuple) else 2, opAssoc.LEFT, to_json_operator) for o in KNOWN_OPS] + [ (COLLATENOCASE, 1, opAssoc.LEFT, to_json_operator) ]).setName("expression").setDebugActions(*debug))
NIL = L('"-"') NIL.setName("Nil") NIL.setParseAction(lambda s, l, t: NilValue) PRIORITY = LANGLE + Word(srange("[0-9]"), min=1, max=3) + RANGLE # 191 Max PRIORITY = PRIORITY.setResultsName("priority") PRIORITY.setName("Priority") PRIORITY.setParseAction(lambda s, l, t: int(t[0])) TIMESTAMP = Word(printables) TIMESTAMP = TIMESTAMP.setResultsName("timestamp") TIMESTAMP.setName("Timestamp") HOSTNAME = Combine(NIL | Word(printables)) HOSTNAME = HOSTNAME.setResultsName("hostname") HOSTNAME.setName("Hostname") APPNAME = Word("".join(set(printables) - {"["})) APPNAME = APPNAME.setResultsName("appname") APPNAME.setName("AppName") PROCID = Combine(LBRACKET + Word("".join(set(printables) - {"]"})) + RBRACKET) PROCID = PROCID.setResultsName("procid") PROCID.setName("ProcID") HEADER = PRIORITY + TIMESTAMP + SP + HOSTNAME + SP + APPNAME + PROCID MESSAGE = restOfLine.setResultsName("message") MESSAGE.setName("Message") SYSLOG_MESSAGE = HEADER + COLON + SP + MESSAGE
attr.leaveWhitespace() attr.setName('attr') hexdigits = Word(string.hexdigits, exact=2) hexdigits.setName('hexdigits') escaped = Suppress(Literal('\\')) + hexdigits escaped.setName('escaped') def _p_escaped(s, l, t): text = t[0] return chr(int(text, 16)) escaped.setParseAction(_p_escaped) value = Combine(OneOrMore(CharsNotIn('*()\\\0') | escaped)) value.setName('value') equal = Literal("=") equal.setParseAction(lambda s, l, t: pureldap.LDAPFilter_equalityMatch) approx = Literal("~=") approx.setParseAction(lambda s, l, t: pureldap.LDAPFilter_approxMatch) greater = Literal(">=") greater.setParseAction(lambda s, l, t: pureldap.LDAPFilter_greaterOrEqual) less = Literal("<=") less.setParseAction(lambda s, l, t: pureldap.LDAPFilter_lessOrEqual) filtertype = equal | approx | greater | less filtertype.setName('filtertype') simple = attr + filtertype + value simple.leaveWhitespace() simple.setName('simple')
) # TODO: Use 'wildcard_name' to handle quotes/no-quotes '*' )('')) # ('') # ('ip_port_w') # ip4s_subnet = Word(nums, min=1, max=2) _ip4s_subnet = Regex(r'(3[0-2]|' r'[0-2][0-9]|' r'[0-9])') ip4s_subnet = _ip4s_subnet('') ip4s_subnet.setName('<ip4_subnet>') ip4_addr = pyparsing_common.ipv4_address ip4_addr.setName('<ip4_addr>') ip4_addr_or_wildcard = (wildcard_name | ip4_addr) ip4_addr_or_wildcard.setName('<ip4_addr_or_wildcard>') ip4s_prefix = Combine(ip4_addr + '/' - ip4s_subnet) ip4s_prefix.setName('<ip4subnet>') # Apparently, pyparsing_common.ipv6_address cannot the following: # - do device index suffix of "%eth0" or "%1" # - Support IPv4 notation after short or mixed IPv6 # so we roll our own IPv6 address parser # Device Index (aka Unix sin6_scope_id) can be 32-bit integer or 64-char readable device name # _ip6_device_index = r'%([0-9]{1,10})|([a-zA-Z0-9\.\-_]{1,64})' _ip6_device_index = r'%' + \ Combine( Word(nums, min=1, max=10) # Microsoft Windows | Word(alphanums, min=1, max=63) # Most *nixes ) ########ip6_addr = pyparsing_common.ipv6_address
# TODO: Positive real number between zero and one. decimal = real # String ---------------------------------------------------------------------- q_string = (sglQuotedString | dblQuotedString).setName("q_string") #double_quoted_string = QuotedString('"', multiline=True,escChar="\\", # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine( double_quoted_string+ Optional(OneOrMore(pluss+double_quoted_string)), adjacent=False ) word = quoted_string.setName("word") # Word(alphanums) # Graph attributes ------------------------------------------------------------ hex_color = Word(hexnums, exact=2) #TODO: Optional whitespace rgb = Literal("#").suppress() + hex_color.setResultsName("red") + \ hex_color.setResultsName("green") + hex_color.setResultsName("blue") rgba = rgb + hex_color.setResultsName("alpha") hsv = decimal.setResultsName("hue") + decimal.setResultsName("saturation") + \ decimal.setResultsName("value") color_name = double_quoted_string | Word(alphas) colour = rgba | rgb | hsv | color_name #------------------------------------------------------------------------------ # A convenient function for calculating a unique name given a list of # existing names.
| rr_class_hesiod | rr_class_ch | rr_class_none | rr_class_any)('rr_class') rr_class_set.setName('<rr_class>') domain_charset_alphanums_dash_underscore = alphanums + '_-' domain_generic_label = Word(domain_charset_alphanums_dash_underscore, min=1, max=63) domain_generic_fqdn = Combine(domain_generic_label + ZeroOrMore(Literal('.') + domain_generic_label) + Optional(Char('.'))) domain_generic_fqdn.setName('<generic-fqdn>') domain_generic_fqdn.setResultsName('domain_name') rr_domain_name = Combine(domain_generic_fqdn + Optional(Literal('.'))) rr_domain_name.setName('<rr_domain_name>') charset_acl_name_base = alphanums + '_-.+~@$%^&*()=[]\\|:<>`?' # no semicolon nor curly braces allowed charset_view_name_base = alphanums + '_-.+~@$%^&*()=[]\\|:<>`?' # no semicolon nor curly braces allowed charset_view_name_dquotable = charset_view_name_base + "\'" charset_view_name_squotable = charset_view_name_base + '\"' view_name_base = Word(charset_acl_name_base, max=64) view_name_base.setName('<view-name-unquoted>') view_name_dquotable = Combine( Char('"') + Word(charset_view_name_dquotable, max=62) + Char('"'))
) ] ).setName("expression").setDebugActions(*debug)) # SQL STATEMENT selectColumn = Group( Group(expr).setName("expression1")("value").setDebugActions(*debug) + Optional(Optional(AS) + ident.copy().setName("column_name1")("name").setDebugActions(*debug)) | Literal('*')("value").setDebugActions(*debug) ).setName("column") tableName = ( ident("value").setName("table name").setDebugActions(*debug) + Optional(AS) + ident("name").setName("table alias").setDebugActions(*debug) | ident.setName("table name").setDebugActions(*debug) ) join = ((CROSSJOIN | INNERJOIN | JOIN)("op") + tableName("join") + Optional(ON + expr("on"))).addParseAction(to_join_call) sortColumn = expr("value").setName("sort1").setDebugActions(*debug) + Optional(DESC("sort")) | \ expr("value").setName("sort2").setDebugActions(*debug) # define SQL tokens selectStmt << Group( Group(Group( delimitedList( Group( SELECT.suppress().setDebugActions(*debug) + delimitedList(selectColumn)("select") + Optional( FROM.suppress().setDebugActions(*debug) + (delimitedList(Group(tableName)) + ZeroOrMore(join))("from") +
compound = Group( realNum("literal").setName("float").setDebug(DEBUG) | intNum("literal").setName("int").setDebug(DEBUG) | sqlString("literal").setName("string").setDebug(DEBUG) | (Literal("(").suppress() + Group(delimitedList(expr)) + Literal(")").suppress()).setDebug(DEBUG) | (Word(alphas)("op").setName("function name") + Literal("(") + Group(delimitedList(expr))("params") + ")").addParseAction(to_json_call).setDebug(DEBUG) | ident ) expr << Group(infixNotation( compound, [(o['literal'], o.get('arity', 2), opAssoc.LEFT, to_json_operator) for o in KNOWN_OPS] ).setName("expression")) # SQL STATEMENT column = Group( Group(expr).setName("expression")("value") + AS + ident.setName("column name")("name").setDebug(DEBUG) | Group(expr).setName("expression")("value") + ident.setName("column name")("name").setDebug(DEBUG) | Group(expr).setName("expression")("value").setDebug(DEBUG) | Literal('*')("value").setDebug(DEBUG) ).setName("column") tableName = ident.setName("table name") # define SQL tokens selectStmt << ( SELECT.suppress() + delimitedList(column)("select") + FROM.suppress() + delimitedList(tableName)("from") + Optional(WHERE.suppress() + Group(expr).setName("expression"))("where") + Optional(GROUPBY.suppress() + Group(delimitedList(column)).setName("columns"))("groupby") + Optional(ORDERBY.suppress() + Group(delimitedList(column)).setName("columns"))("orderby") )