def receiver_input_rule(): path = Combine(ZeroOrMore(word + ".") + word) input = path.setResultsName("input") operator = oneOf(operators.keys()).setResultsName("operator") value = path.setResultsName("value") comparison = operator + value is_or_was = Word("is") | Word("was") condition = Group(input + is_or_was.setResultsName("temporal") + comparison) res = ZeroOrMore(condition + _and) + condition conditions = Group(res).setResultsName("conditions") return Optional("always").setResultsName("always_fire_rule") + when + conditions + then + actions
def make_enewick_parser(): # atoms lparen = Literal("(").suppress() rparen = Literal(")").suppress() colon = Literal(":").suppress() # semicolon = Literal(";").suppress() comma = Literal(",").suppress() point = Literal(".") e = CaselessLiteral("E") sharp = Literal("#").suppress() # terminal name = Word( alphanums + alphas8bit + "_" + "-" + "." + "+" + "&" + "/" + "~" + "{" + "}" + "*" + "'" + '"' + "\\" + "?" ) string = Word(alphas) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums)) ).setParseAction(cvtReal) number = Combine(Word(nums)).setParseAction(cvtInt) label = ( Optional(name).setResultsName("label") + Optional(sharp + Optional(string).setResultsName("type") + number.setResultsName("tag")) + Optional(colon + fnumber).setResultsName("length") ) subtree = Forward() subtreelist = Forward() subtree << Group(((lparen + subtreelist + rparen).setResultsName("subtree") | label) + Optional(label)) subtreelist << subtree + Optional(comma + subtreelist) tree = subtree + Word(";").suppress() return tree.parseString
def receiver_input_rule(): path = Combine(ZeroOrMore(word + ".") + word) input = path.setResultsName("input") operator = oneOf(operators.keys()).setResultsName("operator") value = path.setResultsName("value") comparison = operator + value is_or_was = Word("is") | Word("was") condition = Group(input + is_or_was.setResultsName("temporal") + comparison) res = ZeroOrMore(condition + _and) + condition conditions = Group(res).setResultsName("conditions") return Optional("always").setResultsName( "always_fire_rule") + when + conditions + then + actions
def expression(self): from pyparsing import Suppress,Combine,Optional,oneOf,OneOrMore,Word,nums,Group,alphas,alphanums,Literal,SkipTo,empty,lineEnd cvtInt = lambda toks: int(toks[0]) cvtReal = lambda toks: float(toks[0]) cvtTuple = lambda toks : tuple(toks.asList()) nameJoin = lambda toks : "".join([tok.replace("#","") for tok in toks[0]]) #lambda toks: " ".join([str(t) for t in toks[0]]) # define punctuation as suppressed literals lparen,rparen,lbrack,rbrack,lbrace,rbrace,colon = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums))\ .setName("integer")\ .setParseAction( cvtInt ) real = Combine(Optional(oneOf("+ -")) + Word(nums) + "." + Optional(Word(nums)) + Optional(oneOf("e E")+Optional(oneOf("+ -")) +Word(nums))).setName("real").setParseAction( cvtReal ) # TREE DEFINITION # ((seq2: 0.537243, seq1: 0.000004): 0.255741, seq3: 0.281503); tree_w_branches = ( OneOrMore(Word("():,."+alphas+nums))+Literal(";") ).setParseAction(lambda tokens: " ".join(tokens[:-1])+";") # SITE PROBABILITIES # site Freq Data: # 1 1 AAA: A(0.978) A(1.000) site_prob = ( integer.setResultsName("site",listAllMatches=True) + integer.setResultsName("freq",listAllMatches=True) + Word(alphas+"-").setResultsName("extant",listAllMatches=True) + colon + Group(OneOrMore(Group(Word(alphas,exact=1)+lparen+real+rparen))).setResultsName("probability",listAllMatches=True) + lineEnd ) # ANCESTRAL SEQUENCES # seq1 ACC # node #4 ACC # Optional # character with node # needs to be joined into a single name sequence = ( Group(Word(alphanums)+ Optional(Combine(Literal("#")+Word(nums)))).setParseAction(nameJoin).setResultsName("name",listAllMatches=True)+ Word(alphas+"- ").setResultsName("sequence", listAllMatches=True)+lineEnd ) return (SkipTo(Literal("Ancestral reconstruction by AAML."),include=True).suppress() + tree_w_branches.setResultsName("tree") + SkipTo(Literal("site")+Literal("Freq")+Literal("Data:"), include=True,).suppress()+ Group(OneOrMore(site_prob)).setResultsName("sites")+ SkipTo(Literal("List of extant and reconstructed sequences")+Word(nums)+Word(nums), include=True).suppress()+ Group(OneOrMore(sequence)).setResultsName("sequences")+ SkipTo(Literal("for a site."),include=True).suppress()+ Group(OneOrMore(real)).setResultsName("probability")+ empty )
def makeeNewickParser(): # atoms lparen = Literal("(").suppress() rparen = Literal(")").suppress() colon = Literal(":").suppress() semicolon = Literal(";").suppress() comma = Literal(",").suppress() point = Literal(".") e = CaselessLiteral("E") sharp = Literal("#").suppress() # terminal name = Word(alphanums + alphas8bit + "_" + "-" + "." + "+" + "&" + "/" + "~" + "{" + "}" + "*" + "'" + '"' + '\\' + '?') string = Word(alphas) fnumber = Combine( Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums)) ).setParseAction(cvtReal) number = Combine( Word(nums)).setParseAction(cvtInt) label = \ Optional(name).setResultsName("label") + \ Optional( sharp + Optional(string).setResultsName("type") + number.setResultsName("tag") ) + \ Optional(colon + fnumber).setResultsName("length") subtree = Forward() subtreelist = Forward() subtree << \ Group(((lparen + subtreelist + rparen).setResultsName("subtree") | label ) + Optional(label) ) subtreelist << subtree + Optional(comma + subtreelist) tree = subtree + Word(";").suppress() return tree.parseString
def _get_handbrake_title_pattern(self): title = Literal("+ title").suppress() integer = Word("0123456789") time = Combine(integer + ":" + integer + ":" + integer) duration = Literal("+ duration:").suppress() subtitle = Literal("+ subtitle tracks:") iso = Literal('(iso639-2:').suppress() + Word(alphas) subtitle_track = Literal("+").suppress() + Group(integer + SkipTo(iso).suppress() + iso) + restOfLine.suppress() title_num = integer.setResultsName("title") duration_num = time.setResultsName("duration") subtitles = Group(ZeroOrMore(subtitle_track)).setResultsName("subtitles") pattern = title + title_num + \ SkipTo(duration).suppress() + \ duration + duration_num + \ SkipTo(subtitle).suppress() + subtitle.suppress() + subtitles return pattern
def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word( nums ) ipAddress = delimitedList( integer, ".", combine=True ) timeZoneOffset = Word("+-",nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Combine( Suppress("[") + Combine( integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer + ' ' + timeZoneOffset) + Suppress("]") ) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) return logLineBNF
PRIORITY.setParseAction(lambda s, l, t: int(t[0])) TIMESTAMP = Word(printables) TIMESTAMP = TIMESTAMP.setResultsName("timestamp") TIMESTAMP.setName("Timestamp") HOSTNAME = NIL ^ Word(printables) HOSTNAME = HOSTNAME.setResultsName("hostname") HOSTNAME.setName("Hostname") APPNAME = Word("".join(set(printables) - {"["})) APPNAME = APPNAME.setResultsName("appname") APPNAME.setName("AppName") PROCID = Combine(LBRACKET + Word("".join(set(printables) - {"]"})) + RBRACKET) PROCID = PROCID.setResultsName("procid") PROCID.setName("ProcID") HEADER = PRIORITY + TIMESTAMP + SP + HOSTNAME + SP + APPNAME + PROCID MESSAGE = restOfLine.setResultsName("message") MESSAGE.setName("Message") SYSLOG_MESSAGE = HEADER + COLON + SP + MESSAGE SYSLOG_MESSAGE.leaveWhitespace() @attr.s(slots=True, frozen=True) class SyslogMessage: facility = attr.ib(
def __init__(self): intNum = Word(nums) floatNum = Combine(intNum + Optional("." + intNum)) string = QuotedString("'") | QuotedString('"') regex = QuotedString("/") ident = Word( alphas, alphanums + "_" ) time_period = Keyword("minutes") | Keyword("seconds") ordering = Keyword("unique") | Keyword("random") string_type = Keyword("random").setResultsName("ordering") + \ Keyword("string").setResultsName("data_type") + \ Keyword("of") + Keyword("length") + \ intNum.setResultsName("length") numeric_type = ordering.setResultsName("ordering") + \ Keyword("number").setResultsName("data_type") + Keyword("from") + \ floatNum.setResultsName("min") + Keyword("to") + \ floatNum.setResultsName("max") var_type = string_type | numeric_type var = Group(Keyword("var").setResultsName("type") + \ ident.setResultsName("name") + Keyword("is") + \ Keyword("a") + var_type) ident_list = delimitedList( ident ) using_ordering = Keyword("randomly") | Keyword("sequentially") using = Group(Keyword("using").setResultsName("type") + \ ident_list.setResultsName("vars") + Keyword("from") + \ string.setResultsName("filename") + \ using_ordering.setResultsName("ordering")) pause = Group(Keyword("pause").setResultsName("type") + \ Keyword("between") + \ intNum.setResultsName("lower_time") + Keyword("and") + \ intNum.setResultsName("upper_time") + Keyword("seconds")) get = Keyword("get").setResultsName("method") post = Keyword("post").setResultsName("method") put = Keyword("put").setResultsName("method") delete = Keyword("delete").setResultsName("method") method = (get | post | put | delete).setResultsName("type") url = string.setResultsName("url") data = Keyword("with") + Keyword("data") + \ string.setResultsName("data") match = Group( \ Keyword("ensure") + Keyword("match") + \ regex.setResultsName("regex")) match_list = Group(OneOrMore(match)).setResultsName("matches") request = Group(method + \ Optional(Keyword("all")).setResultsName("all") + \ url + Optional(data) + Optional(match_list)).setName("request") action = request | pause | var | using action_list = \ Group(OneOrMore(action)).setResultsName("actions") session = Group( Keyword("create") + \ Keyword("session") + Keyword("with") + \ Keyword("weight") + \ intNum.setResultsName("weight") + Keyword("as") + \ string.setResultsName("name") + \ ":" + action_list) session_list = OneOrMore(session).setResultsName("sessions") spawn = Group( Keyword("spawn") + \ Keyword("users") + Keyword("every") + \ intNum.setResultsName("user_time") + \ time_period.setResultsName("user_time_units") + \ Keyword("for") + \ intNum.setResultsName("max_duration") + \ time_period.setResultsName("max_duration_units") + \ Optional( Keyword("up") + Keyword("to") + \ intNum.setResultsName("max_users") + Keyword("users"))) spawn_list = OneOrMore(spawn).setResultsName("spawns") load = Group( Keyword("create") + Keyword("load") + ":" + \ spawn_list).setResultsName("load") comment = "#" + restOfLine script = session_list + load script.ignore(comment) self.grammar = script
structured_data = structured_data.setResultsName('STRUCTURED_DATA') time_hour = Regex('0[0-9]|1[0-9]|2[0-3]') time_minute = Regex('[0-5][0-9]') time_second = time_minute time_secfrac = Regex('\.[0-9]{1,6}') time_numoffset = Or([Regex('\+'), Regex('-')]) + time_hour + ':' + time_minute time_offset = Or([Regex('Z'), time_numoffset]) partial_time = time_hour + ':' + time_minute + ':' + time_second + \ Optional(time_secfrac) full_time = partial_time + time_offset date_mday = Regex('[0-9]{2}') date_month = Regex('0[1-9]|1[0-2]') date_fullyear = Regex('[0-9]{4}') full_date = date_fullyear + '-' + date_month + '-' + date_mday timestamp = Combine(Or([nilvalue, full_date + 'T' + full_time])) timestamp = timestamp.setResultsName('TIMESTAMP') msgid = Or([nilvalue, CharsNotIn('= ]"', 1, 32)]) if SUPPORT_MISSING_VALUES: msgid = Optional(msgid) msgid = msgid.setResultsName('MSGID') procid = Or([nilvalue, CharsNotIn('= ]"', 1, 128)]) if SUPPORT_MISSING_VALUES: procid = Optional(procid) procid = procid.setResultsName('PROCID') app_name = Or([nilvalue, CharsNotIn('= ]"', 1, 48)]) if SUPPORT_MISSING_VALUES: app_name = Optional(app_name) app_name = app_name.setResultsName('APP_NAME') hostname = Or([nilvalue, CharsNotIn('= ]"', 1, 255)]) if SUPPORT_MISSING_VALUES: hostname = Optional(hostname)
colon = Literal(":") rfc3164_date = Word(nums, min=4, max=4) + dash + Word(nums, min=2, max=2) + dash + Word(nums, min=2, max=2) rfc3164_time = Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) + \ Optional(Literal(".") + Word(nums, min=1, max=6)) rfc3164_timenumoffset = (Literal("-") | Literal("+")) + Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) rfc3164_timeoffset = Literal("Z") | rfc3164_timenumoffset rfc3164_timestamp = Combine(rfc3164_date + Literal("T") + rfc3164_time + rfc3164_timeoffset) timestamp = NilValue | rfc3164_timestamp hostname = NilValue | Word(printables, min=1, max=255) appname = NilValue | Word(printables, min=1, max=48) procname = NilValue | Word(printables, min=1, max=128).setParseAction(maybeToInt) msgid = NilValue | Word(printables, min=1, max=32) header = Group( pri.setResultsName('pri') + version.setResultsName('version') + SP + timestamp.setResultsName('timestamp') + SP + hostname.setResultsName('hostname') + SP + appname.setResultsName('appname') + SP + procname.setResultsName('procname') + SP + msgid.setResultsName('msgid') ) sd_name = Word(NameAscii, min=1, max=32) sd_id = sd_name param_name = sd_name sd_param = Group( param_name.setResultsName('param_name') + Suppress(Literal("=")) + QuotedString(quoteChar='"', escChar='\\', escQuote='\\').setResultsName('param_value') ) sd_element = Group( Suppress("[") + sd_id.setResultsName('sd_id') + ZeroOrMore(SP + sd_param).setResultsName('sd_params') +
class Grammar(object): digit = Regex(r'\d') nonzerodigit = Regex(r'[1-9]') positiveinteger = Combine(nonzerodigit + ZeroOrMore(digit)) negativeinteger = Combine('-' + positiveinteger) nonzerointeger = positiveinteger ^ negativeinteger unsignedinteger = '0' ^ positiveinteger integer = '0' ^ nonzerointeger sep = Regex(r'[^a-zA-Z_]+$') dialect_method_prefix = '_dialect__' def __init__(self, dialect=None): if dialect is None: dialect = 'slice_list' self.dialect = dialect @property def dialect(self): return self._dialect @dialect.setter def dialect(self, name): if name: name = name.lower().replace(' ', '_').replace('-', '_') try: getattr(self, self.__class__.dialect_method_prefix + name)() self._dialect = name self._grammar_update = True except AttributeError: self._dialect = None error = dict(mesg='Unknown dialect', selected_option=name, available_options=self.get_dialects()) raise OptionNotFound(error) @property def allow_relative_indices(self): return self.endpoint == self.__class__.integer @allow_relative_indices.setter def allow_relative_indices(self, enabled): self._grammar_update = True endpoint = 'integer' if enabled else 'unsignedinteger' self.endpoint = getattr(self.__class__, endpoint) @property def allow_reverse_strides(self): return self.stride == self.__class__.nonzerointeger @allow_reverse_strides.setter def allow_reverse_strides(self, enabled): self._grammar_update = True stride = 'nonzerointeger' if enabled else 'positiveinteger' self.stride = getattr(self.__class__, stride) @property def allow_slice_list(self): return self._allow_slice_list @allow_slice_list.setter def allow_slice_list(self, enabled): self._grammar_update = True self._allow_slice_list = enabled @property def allow_stepped_intervals(self): return self._allow_stepped_intervals @allow_stepped_intervals.setter def allow_stepped_intervals(self, enabled): self._grammar_update = True self._allow_stepped_intervals = enabled def get_dialects(self): prefix = self.__class__.dialect_method_prefix prefix_length = len(prefix) return [i[prefix_length:] for i in dir(self) if i.startswith(prefix)] def list_dialects(self, indent=' '): return 'Dialects:\n' + ('\n'.join( [indent + dialect for dialect in self.get_dialects()])) def _dialect__slice_list(self): self.list_sep = ',' self.range_sep = ':' self.step_sep = ':' self.allow_relative_indices = True self.allow_stepped_intervals = True self.allow_reverse_strides = True self.allow_slice_list = True self.interval = {':': 'closed'} def _dialect__python_slice(self): self._dialect__slice_list() self.allow_slice_list = False def _dialect__dot_notation(self): self._dialect__slice_list() range_sep = Combine(Optional('.') + ':' + Optional('.')) self.range_sep = range_sep ^ '..' self.interval = { ':': 'closed', '.:': 'left-open', ':.': 'right-open', '.:.': 'open', '..': 'open' } def _dialect__double_dot(self): self._dialect__slice_list() self.range_sep = Combine('..' + Optional('.')) self.allow_stepped_interval = False self.interval = {'..': 'closed', '...': 'right-open'} def _dialect__unix_cut(self): self._dialect__slice_list() self.range_sep = '-' self.allow_relative_indices = False self.allow_stepped_interval = False def validate_separators(self): """ Sepaarators can not be alphanumeric when headers are enabled, because of potential ambiguity. """ for type_ in ['range', 'step', 'list']: try: sep = getattr(self, type_ + '_sep') if isinstance(sep, str): self.sep.parseString(getattr(self, type_ + '_sep')) setattr(self, type_ + '_sep', Literal(sep)) except ParseException: mesg = ('{} separator can\'t contain alphanumeric or ' 'underscore characters when headers are enabled.') raise ValueError(mesg.format(sep.title())) return True def _get_slice_item(self): index = endpoint = self.endpoint short_slice = Optional(endpoint) + self.range_sep + Optional(endpoint) if not self.allow_stepped_intervals: return Combine(index ^ short_slice) long_slice = short_slice + self.step_sep + Optional(self.stride) return Combine(index ^ short_slice ^ long_slice) def _get_slice_list(self): sep = Suppress(self.list_sep) slice_item = self._get_slice_item() return slice_item + ZeroOrMore(sep + slice_item) + Optional(sep) def _build_slice_grammar(self): to_int = lambda tok: int(tok[0]) endpoint = self.endpoint.setResultsName range_sep = self.range_sep.setResultsName('range_sep') lower_bound = Optional(endpoint('start').setParseAction(to_int)) upper_bound = Optional(endpoint('stop').setParseAction(to_int)) stride = self.stride.setResultsName('step').setParseAction(to_int) short_slice = lower_bound + range_sep + upper_bound long_slice = short_slice + self.step_sep + Optional(stride) index = lower_bound if self.allow_stepped_intervals: return index ^ short_slice ^ long_slice return index ^ short_slice def _build_grammar(self): self.validate_separators() self._slice_grammar = self._build_slice_grammar() + pp.stringEnd self._text_grammar = (self._get_slice_list() if self.allow_slice_list else self._get_slice_item()) + pp.stringEnd def parse_text(self, text): if self._grammar_update: self._build_grammar() self._grammar_update = False try: slices = self._text_grammar.parseString(text) except ParseException as error: info = {'text': text, 'column': error.column} raise InvalidSliceString(error.msg, info) return (dict(self._slice_grammar.parseString(i)) for i in slices) def parse(self, text): result = (self._get_interval_args(i) for i in self.parse_text(text)) return result if self.allow_slice_list else next(result) def _get_interval_args(self, slice_): range_sep = slice_.get('range_sep') if range_sep: del slice_['range_sep'] slice_['type_'] = self.interval.get(range_sep, 'closed') else: slice_['type_'] = 'closed' slice_['stop'] = slice_['start'] return slice_
We use token.setResultsName("key") to make the key value dictionary for easily fishing results out later, much better thean indexing a list, which might change size if we change things later. Dictionary is not sensitive to that. We also use () for long lines not \ because apparently it's better. """ """ checkout - standard format grammar definition OUT product version pool# user host "isv_def" count cur_use cur_resuse \ server_handle share_handle process_id "project" "requested product" \ "requested version" mm/dd hh:mm:ss example: OUT imarisbase 6.0 9 heisenberg_lab my-workstation-72 "" 1 1 0 26e 26e 410 "" "" "" 06/16 10:57:52 """ rlmRlogCheckoutEntry_std = ( Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + poolNum + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + shareHandle + processId + project + requestedProduct + requestedVersion + date.setResultsName("date") + timeHHMMSS.setResultsName("time")) """ checkout - small format grammar definition OUT product version user host "isv_def" count server_handle share_handle hh:mm example: OUT imarisbase 7.4 serrmeli my-workstation-72 "" 1 1281 7c1 14:22 """ rlmRlogCheckoutEntry_sml = (Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + user.setResultsName("user") + host.setResultsName("host") +
return int(toks[0]) def maybeToInt(s, loc, toks): if all(x.isdigit() for x in toks[0]): return int(toks[0]) else: return toks[0] pp_pri = Combine(Suppress(Literal("<")) + Word(nums, min=1, max=3) + Suppress(Literal(">"))).setParseAction(toInt).setResultsName('pri') pp_key = Word(alphas) pp_value = Or([Word(printables), QuotedString('"')]) pp_kvpair = pp_key + Suppress(Literal("=")) + pp_value pp_log_message = Group(pp_pri.setResultsName("pri") + OneOrMore(pp_kvpair).setResultsName('msg')) class ParseError(Exception): def __init__(self, description, message): self.description = description self.message = message def __repr__(self): return '{0}({1!r}, {2!r})'.format(self.__class__.__name__, self.description, self.message) # pragma: no cover def __str__(self): return '{0}: {1!r}'.format(self.description, self.message) # pragma: no cover
fishing results out later, much better thean indexing a list, which might change size if we change things later. Dictionary is not sensitive to that. We also use () for long lines not \ because apparently it's better. """ """ checkout - standard format grammar definition OUT product version pool# user host "isv_def" count cur_use cur_resuse \ server_handle share_handle process_id "project" "requested product" \ "requested version" mm/dd hh:mm:ss example: OUT imarisbase 6.0 9 heisenberg_lab my-workstation-72 "" 1 1 0 26e 26e 410 "" "" "" 06/16 10:57:52 """ rlmRlogCheckoutEntry_std = (Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + poolNum + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + shareHandle + processId + project + requestedProduct + requestedVersion + date.setResultsName("date") + timeHHMMSS.setResultsName("time")
_p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit) _p_lease_hardware_ethernet = _ungroup( Keyword("hardware").suppress() + Keyword("ethernet").suppress() + _p_mac) _p_lease_junk = ( Word(alphas) # if we include { } ; here, they become greedy and eat the closing # brace or semicolon + CharsNotIn('{};')).suppress() _p_lease_decl = (_p_lease_deleted.setResultsName('deleted') | _p_lease_hardware_ethernet.setResultsName('mac') | _p_lease_junk) + Literal(';').suppress() _p_lease = (Keyword("lease").suppress() + _p_ip_address.setResultsName('ip') + _ungroup( nestedExpr( opener='{', closer='}', content=_p_lease_decl, ignoreExpr=quotedString, ), )).setParseAction(dictify) def parse(s): g = _p_lease.scanString(s) while True: try: (toks, start, end) = next(g) except StopIteration:
| rr_class_ch | rr_class_none | rr_class_any)('rr_class') rr_class_set.setName('<rr_class>') domain_charset_alphanums_dash_underscore = alphanums + '_-' domain_generic_label = Word(domain_charset_alphanums_dash_underscore, min=1, max=63) domain_generic_fqdn = Combine(domain_generic_label + ZeroOrMore(Literal('.') + domain_generic_label) + Optional(Char('.'))) domain_generic_fqdn.setName('<generic-fqdn>') domain_generic_fqdn.setResultsName('domain_name') rr_domain_name = Combine(domain_generic_fqdn + Optional(Literal('.'))) rr_domain_name.setName('<rr_domain_name>') charset_acl_name_base = alphanums + '_-.+~@$%^&*()=[]\\|:<>`?' # no semicolon nor curly braces allowed charset_view_name_base = alphanums + '_-.+~@$%^&*()=[]\\|:<>`?' # no semicolon nor curly braces allowed charset_view_name_dquotable = charset_view_name_base + "\'" charset_view_name_squotable = charset_view_name_base + '\"' view_name_base = Word(charset_acl_name_base, max=64) view_name_base.setName('<view-name-unquoted>') view_name_dquotable = Combine( Char('"') + Word(charset_view_name_dquotable, max=62) + Char('"')) view_name_squotable = Combine(
class Grammar(object): digit = Regex(r'\d') nonzerodigit = Regex(r'[1-9]') positiveinteger = Combine(nonzerodigit + ZeroOrMore(digit)) negativeinteger = Combine('-' + positiveinteger) nonzerointeger = positiveinteger ^ negativeinteger unsignedinteger = '0' ^ positiveinteger integer = '0' ^ nonzerointeger sep = Regex(r'[^a-zA-Z_]+$') dialect_method_prefix = '_dialect__' def __init__(self, dialect=None): if dialect is None: dialect = 'slice_list' self.dialect = dialect @property def dialect(self): return self._dialect @dialect.setter def dialect(self, name): if name: name = name.lower().replace(' ', '_').replace('-', '_') try: getattr(self, self.__class__.dialect_method_prefix + name)() self._dialect = name self._grammar_update = True except AttributeError: self._dialect = None error = dict(mesg='Unknown dialect', selected_option=name, available_options=self.get_dialects()) raise OptionNotFound(error) @property def allow_relative_indices(self): return self.endpoint == self.__class__.integer @allow_relative_indices.setter def allow_relative_indices(self, enabled): self._grammar_update = True endpoint = 'integer' if enabled else 'unsignedinteger' self.endpoint = getattr(self.__class__, endpoint) @property def allow_reverse_strides(self): return self.stride == self.__class__.nonzerointeger @allow_reverse_strides.setter def allow_reverse_strides(self, enabled): self._grammar_update = True stride = 'nonzerointeger' if enabled else 'positiveinteger' self.stride = getattr(self.__class__, stride) @property def allow_slice_list(self): return self._allow_slice_list @allow_slice_list.setter def allow_slice_list(self, enabled): self._grammar_update = True self._allow_slice_list = enabled @property def allow_stepped_intervals(self): return self._allow_stepped_intervals @allow_stepped_intervals.setter def allow_stepped_intervals(self, enabled): self._grammar_update = True self._allow_stepped_intervals = enabled def get_dialects(self): prefix = self.__class__.dialect_method_prefix prefix_length = len(prefix) return [i[prefix_length:] for i in dir(self) if i.startswith(prefix)] def list_dialects(self, indent=' '): return 'Dialects:\n' + ('\n'.join([indent + dialect for dialect in self.get_dialects()])) def _dialect__slice_list(self): self.list_sep = ',' self.range_sep = ':' self.step_sep = ':' self.allow_relative_indices = True self.allow_stepped_intervals = True self.allow_reverse_strides = True self.allow_slice_list = True self.interval = {':': 'closed'} def _dialect__python_slice(self): self._dialect__slice_list() self.allow_slice_list = False def _dialect__dot_notation(self): self._dialect__slice_list() range_sep = Combine(Optional('.') + ':' + Optional('.')) self.range_sep = range_sep ^ '..' self.interval = {':': 'closed', '.:': 'left-open', ':.': 'right-open', '.:.': 'open', '..': 'open'} def _dialect__double_dot(self): self._dialect__slice_list() self.range_sep = Combine('..' + Optional('.')) self.allow_stepped_interval = False self.interval = {'..': 'closed', '...': 'right-open'} def _dialect__unix_cut(self): self._dialect__slice_list() self.range_sep = '-' self.allow_relative_indices = False self.allow_stepped_interval = False def validate_separators(self): """ Sepaarators can not be alphanumeric when headers are enabled, because of potential ambiguity. """ for type_ in ['range', 'step', 'list']: try: sep = getattr(self, type_ + '_sep') if isinstance(sep, str): self.sep.parseString(getattr(self, type_ + '_sep')) setattr(self, type_ + '_sep', Literal(sep)) except ParseException: mesg = ('{} separator can\'t contain alphanumeric or ' 'underscore characters when headers are enabled.') raise ValueError(mesg.format(sep.title())) return True def _get_slice_item(self): index = endpoint = self.endpoint short_slice = Optional(endpoint) + self.range_sep + Optional(endpoint) if not self.allow_stepped_intervals: return Combine(index ^ short_slice) long_slice = short_slice + self.step_sep + Optional(self.stride) return Combine(index ^ short_slice ^ long_slice) def _get_slice_list(self): sep = Suppress(self.list_sep) slice_item = self._get_slice_item() return slice_item + ZeroOrMore(sep + slice_item) + Optional(sep) def _build_slice_grammar(self): to_int = lambda tok: int(tok[0]) endpoint = self.endpoint.setResultsName range_sep = self.range_sep.setResultsName('range_sep') lower_bound = Optional(endpoint('start').setParseAction(to_int)) upper_bound = Optional(endpoint('stop').setParseAction(to_int)) stride = self.stride.setResultsName('step').setParseAction(to_int) short_slice = lower_bound + range_sep + upper_bound long_slice = short_slice + self.step_sep + Optional(stride) index = lower_bound if self.allow_stepped_intervals: return index ^ short_slice ^ long_slice return index ^ short_slice def _build_grammar(self): self.validate_separators() self._slice_grammar = self._build_slice_grammar() + pp.stringEnd self._text_grammar = (self._get_slice_list() if self.allow_slice_list else self._get_slice_item()) + pp.stringEnd def parse_text(self, text): if self._grammar_update: self._build_grammar() self._grammar_update = False try: slices = self._text_grammar.parseString(text) except ParseException as error: info = {'text': text, 'column': error.column} raise InvalidSliceString(error.msg, info) return (dict(self._slice_grammar.parseString(i)) for i in slices) def parse(self, text): result = (self._get_interval_args(i) for i in self.parse_text(text)) return result if self.allow_slice_list else next(result) def _get_interval_args(self, slice_): range_sep = slice_.get('range_sep') if range_sep: del slice_['range_sep'] slice_['type_'] = self.interval.get(range_sep, 'closed') else: slice_['type_'] = 'closed' slice_['stop'] = slice_['start'] return slice_
COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, TABLESAMPLE)) select_tok = Keyword('select', caseless=True) from_tok = Keyword('from', caseless=True) # for parsing select-from statements idr = ~keyword + Word(alphas + '*', alphanums + '_/-.*').setName('identifier') table_path = Word(''.join([c for c in printables if c not in "?"])).setResultsName('path') table_alias = idr.setResultsName('alias') table_idr = table_path + Optional(Optional(Suppress('as')) + table_alias) column_idr = delimitedList(idr, '.', combine=True) aggregate_function = Combine(Keyword('count') + '(' + column_idr + ')') column_list = Group(delimitedList((column_idr ^ aggregate_function.setResultsName('aggregate_functions', listAllMatches=True)))) # for parsing where statements and_ = Keyword('and', caseless=True) or_ = Keyword('or', caseless=True) in_ = Keyword('in', caseless=True) E = CaselessLiteral('E') binary_op = oneOf('= != < > >= <= eq ne lt le gt ge', caseless=True) arith_sign = Word('+-', exact=1) real_num = Combine( Optional(arith_sign) + ( Word(nums) + '.' + Optional( Word(nums) ) | ( '.' + Word(nums) ) ) + Optional( E + Optional(arith_sign) + Word(nums) ) )
NIL = L('"-"') NIL.setName("Nil") NIL.setParseAction(lambda s, l, t: NilValue) PRIORITY = LANGLE + Word(srange("[0-9]"), min=1, max=3) + RANGLE # 191 Max PRIORITY = PRIORITY.setResultsName("priority") PRIORITY.setName("Priority") PRIORITY.setParseAction(lambda s, l, t: int(t[0])) TIMESTAMP = Word(printables) TIMESTAMP = TIMESTAMP.setResultsName("timestamp") TIMESTAMP.setName("Timestamp") HOSTNAME = Combine(NIL | Word(printables)) HOSTNAME = HOSTNAME.setResultsName("hostname") HOSTNAME.setName("Hostname") APPNAME = Word("".join(set(printables) - {"["})) APPNAME = APPNAME.setResultsName("appname") APPNAME.setName("AppName") PROCID = Combine(LBRACKET + Word("".join(set(printables) - {"]"})) + RBRACKET) PROCID = PROCID.setResultsName("procid") PROCID.setName("ProcID") HEADER = PRIORITY + TIMESTAMP + SP + HOSTNAME + SP + APPNAME + PROCID MESSAGE = restOfLine.setResultsName("message") MESSAGE.setName("Message")
+ ":=" + OneOrMore(matrix_row).setResultsName('data') matrix_data.setParseAction(MatrixData) tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed) set_slice_component = number | symbol | '*' set_slice_record = LPAREN + NotAny('tr') + delimitedList(set_slice_component) + RPAREN set_slice_record.setParseAction(SliceRecord) _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress(":=") set_record = simple_data | _set_record non_dimen_set_record = non_dimen_simple_data | _set_record set_def_stmt = "set" + symbol + Optional(subscript_domain) + \ Optional("dimen" + integer.setResultsName('dimen')) + END set_def_stmt.setParseAction(SetDefStmt) set_member = LBRACKET + delimitedList(data) + RBRACKET set_stmt = "set" + symbol + Optional(set_member).setResultsName("member") + \ Group(non_dimen_set_record + ZeroOrMore(Optional(Suppress(',')) + set_record)) \ .setResultsName("records") + END set_stmt.setParseAction(SetStmt) subscript = single param_data = data | '.' plain_data = param_data | subscript + ZeroOrMore(Optional(Suppress(',')) + subscript) + param_data # should not match a single (tr)
tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed) set_slice_component = number | symbol | '*' set_slice_record = LPAREN + NotAny('tr') + delimitedList( set_slice_component) + RPAREN set_slice_record.setParseAction(SliceRecord) _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress( ":=") set_record = simple_data | _set_record non_dimen_set_record = non_dimen_simple_data | _set_record set_def_stmt = Keyword("set") + symbol + Optional(subscript_domain) + \ Optional(Keyword("dimen") + integer.setResultsName('dimen')) + END set_def_stmt.setParseAction(SetDefStmt) set_member = LBRACKET + delimitedList(data) + RBRACKET set_stmt = Keyword("set") + symbol + Optional(set_member).setResultsName("member") + \ Group(non_dimen_set_record + ZeroOrMore(Optional(Suppress(',')) + set_record)) \ .setResultsName("records") + END set_stmt.setParseAction(SetStmt) subscript = single param_data = data | '.' plain_data = param_data | subscript + ZeroOrMore( Optional(Suppress(',')) + subscript) + param_data # should not match a single (tr)
date = Combine(num + "/" + num + "/" + num) def validateDateString(tokens): try: time.strptime(tokens[0], "%m/%d/%Y") except ValueError: raise ParseException("Invalid date string (%s)" % tokens[0]) date.setParseAction(validateDateString) schoolName = OneOrMore(Word(alphas)) schoolName.setParseAction(lambda tokens: " ".join(tokens)) score = Word(nums).setParseAction(lambda tokens: int(tokens[0])) schoolAndScore = Group(schoolName.setResultsName("school") + \ score.setResultsName("score")) gameResult = date.setResultsName("date") + schoolAndScore.setResultsName("team1") + \ schoolAndScore.setResultsName("team2") tests = """\ 09/04/2004 Virginia 44 Temple 14 09/04/2004 LSU 22 Oregon State 21 09/09/2004 Troy State 24 Missouri 14 01/02/2003 Florida State 103 University of Miami 2""".splitlines() for test in tests: stats = gameResult.parseString(test) if stats.team1.score != stats.team2.score: if stats.team1.score > stats.team2.score: result = "won by " + stats.team1.school else: result = "won by " + stats.team2.school else: result = "tied"
a valid out is OUT imarisbase 6.0 9 heisenberg_lab heisenberg-8-434 "" 1 1 0 26e 26e 410 "" "" "" 06/16 10:57:52 OUT product version pool# user host "isv_def" count cur_use cur_resuse server_handle share_handle process_id "project" "requested product" "requested version" mm/dd hh:mm:ss deny is DENY product version user host "isv_def" count why last_attempt mm/dd hh:mm We use token.setResultsName("key") to make the key value dictionary for easily fishing results out later, much better thean indexing a list, which might change size if we change things later. Dictionary is not sensetive to that. We also use ( ) for long lines not \ because apparently it's better. """ rlmRlogCheckoutEntry = ( Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + poolNum + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + shareHandle + processId + project + requestedProduct + requestedVersion + date.setResultsName("date") + timeHHMMSS.setResultsName("time") ) rlmRlogCheckinEntry = ( Literal("IN").setResultsName("checkedIn") + whyIn + product.setResultsName("product") + version.setResultsName("version") + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + date.setResultsName("date") + timeHHMMSS.setResultsName("time") )
time_hour = Regex('0[0-9]|1[0-9]|2[0-3]') time_minute = Regex('[0-5][0-9]') time_second = time_minute time_secfrac = Regex('\.[0-9]{1,6}') time_numoffset = Or([Regex('\+'), Regex('-')]) + \ time_hour + ':' + time_minute time_offset = Or([Regex('Z'), time_numoffset]) partial_time = time_hour + ':' + time_minute + ':' + time_second + \ Optional(time_secfrac) full_time = partial_time + time_offset date_mday = Regex('[0-9]{2}') date_month = Regex('0[1-9]|1[0-2]') date_fullyear = Regex('[0-9]{4}') full_date = date_fullyear + '-' + date_month + '-' + date_mday timestamp = Combine(Or([nilvalue, full_date + 'T' + full_time])) timestamp = timestamp.setResultsName('TIMESTAMP') msgid = Or([nilvalue, CharsNotIn('= ]"', 1, 32)]) if SUPPORT_MISSING_VALUES: msgid = Optional(msgid) msgid = msgid.setResultsName('MSGID') procid = Or([nilvalue,CharsNotIn('= ]"', 1, 128)]) if SUPPORT_MISSING_VALUES: procid = Optional(procid) procid = procid.setResultsName('PROCID') app_name = Or([nilvalue, CharsNotIn('= ]"', 1, 48)]) if SUPPORT_MISSING_VALUES: app_name = Optional(app_name) app_name= app_name.setResultsName('APP_NAME') hostname = Or([nilvalue, CharsNotIn('= ]"', 1, 255)]) if SUPPORT_MISSING_VALUES: hostname = Optional(hostname)
arch = Array("arch", OneOrMore(valid_arch), valid_arch) # helper defintion for license, to allow license=('custom: "commercial"') tmp_lic = opQuotedString(Word(ac_chars)) | opQuotedString(Literal("custom") + ":" + opQuotedString(Word(ac_chars))) license = Array("license", OneOrMore(tmp_lic), tmp_lic) # TODO: replace it with a better url parser url = Literal("url=") + opQuotedString(Word(printables)) groups = Combine(Literal("groups=(") + OneOrMore(opQuotedString(Word(valname))) + ")") groups = Array("groups", OneOrMore(opQuotedString(Word(valname))), opQuotedString(Word(valname))) # all about dependencies # normal dependency format: name + [qualifier] + version dependency = (opQuotedString((val_package_name.setResultsName("pname", listAllMatches=True) + Optional(Group(compare_operators + vnum)).setResultsName("pversion", listAllMatches=True)))) # descriptive dependency: name + [qualifier] + version + ':' + description descriptive_dep = (opQuotedString(val_package_name.setResultsName("pname", listAllMatches=True) + ZeroOrMore(':' + ZeroOrMore(Word(ac_chars))))) depends = Group(Array("depends", ZeroOrMore(dependency), dependency)) makedepends = Group(Array("makedepends", ZeroOrMore(dependency), dependency)) optdepends = Group(Array("optdepends", ZeroOrMore(descriptive_dep))) checkdepends = Group(Array("checkdepends", ZeroOrMore(dependency), dependency)) provides = Array("provides", ZeroOrMore(dependency), dependency)
# pylint: disable=C0103 backtickString = Regex(r'`[^`]*`').setName("string enclosed in backticks") and_, from_, into, in_, table_key, null, where_ = \ map(upkey, ['and', 'from', 'into', 'in', 'table', 'null', 'where']) var = Word(alphas, alphanums + '_-').setName('variable').setResultsName('var') expr = Combine(Optional('m') + backtickString).setName('python expression').setResultsName('python') table = var.setResultsName('table') type_ = (upkey('string') | upkey('number') | upkey('binary'))\ .setName('type').setResultsName('type') _sign = Word('+-', exact=1) num = Combine(Optional(_sign) + Word(nums) + Optional('.' + Optional(Word(nums)))).setName('number') primitive = (null.setResultsName('null') | num.setResultsName('number') | quotedString.setResultsName('str') | Combine('b' + quotedString).setResultsName('binary')) _emptyset = Keyword('()').setResultsName('set') set_ = (Suppress('(') + delimitedList(Group(primitive)) + Suppress(')')).setResultsName('set') value = Group(primitive | expr | set_ | _emptyset).setName('value') # Wrap these in a group so they can be used independently primitive = Group(primitive | expr).setName('primitive') set_ = Group(set_ | _emptyset | expr).setName('set')
BASIC_TYPES = ["bool", "i8", "u8", "i16", "u16", "i32", "u32", "i64", "u64"] basic_type = Or([Keyword(t) for t in BASIC_TYPES]) bit_width = Word(nums) dec_literal = Word(nums) hex_literal = Combine("0x" + Word(hexnums)) signed_literal = Combine(Optional('-') + (dec_literal ^ hex_literal)) comment = cppStyleComment enumValue = Group( Optional(comment).setResultsName("doc") + identifier.setResultsName("name") + Optional('=' + signed_literal.setResultsName("constant"))) enum = originalTextFor(Group( Optional(comment).setResultsName("doc") + Keyword("enum") + identifier.setResultsName("name") + ':' + basic_type.setResultsName("type") + '{' + delimitedList( enumValue.setResultsName("enum_values", listAllMatches=True), ",") + Optional(',') + '}'), asString=False) field = Group( Optional(comment).setResultsName("doc") + identifier.setResultsName("name") + ':' + qualified_identifier.setResultsName("type") + Optional(':' + bit_width.setResultsName("width")) + ';')
# timestamp - 2017-05-10T06:45:29+00:00 timestamp = Combine(date + TEE + time) # full timestamp # hostname - host hostname = Word(printables, min=1, max=255) # appname - app appname = Word(printables, min=1, max=255) # procname - web.1 procname = Word(printables, min=1, max=255) # msg - State changed from starting to up msg = restOfLine heroku_syslog_message = (pri.setResultsName('pri') + version.setResultsName('version') + SPACE + timestamp.setResultsName('timestamp') + SPACE + hostname.setResultsName('hostname') + SPACE + appname.setResultsName('appname') + SPACE + procname.setResultsName('procname') + SPACE + DASH + SPACE + msg.setResultsName('msg') + lineEnd) # Parse "key=value key=value key="value" key='value'" into a dict attr_label = Word(srange('[a-zA-Z0-9_]')) attr_value = Combine( Suppress('=') + STRIP_QUOTES + Word(printables)
colon = Literal(":") rfc3164_date = Word(nums, min=4, max=4) + dash + Word(nums, min=2, max=2) + dash + Word(nums, min=2, max=2) rfc3164_time = Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) + \ Optional(Literal(".") + Word(nums, min=1, max=6)) rfc3164_timenumoffset = Or("+", "-") + Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) rfc3164_timeoffset = Literal("Z") | rfc3164_timenumoffset rfc3164_timestamp = Combine(rfc3164_date + Literal("T") + rfc3164_time + rfc3164_timeoffset) timestamp = NilValue | rfc3164_timestamp hostname = NilValue | Word(printables, min=1, max=255) appname = NilValue | Word(printables, min=1, max=48) procname = NilValue | Word(printables, min=1, max=128).setParseAction(maybeToInt) msgid = NilValue | Word(printables, min=1, max=32) header = Group( pri.setResultsName('pri') + version.setResultsName('version') + SP + timestamp.setResultsName('timestamp') + SP + hostname.setResultsName('hostname') + SP + appname.setResultsName('appname') + SP + procname.setResultsName('procname') + SP + msgid.setResultsName('msgid') ) sd_name = Word(NameAscii, min=1, max=32) sd_id = sd_name param_name = sd_name sd_param = Group( param_name.setResultsName('param_name') + Suppress(Literal("=")) + QuotedString(quoteChar='"', escChar='\\', escQuote='\\').setResultsName('param_value') ) sd_element = Group( Suppress("[") + sd_id.setResultsName('sd_id') + ZeroOrMore(SP + sd_param).setResultsName('sd_params') +
_p_lease_junk = ( Word(alphas) # if we include { } ; here, they become greedy and eat the closing # brace or semicolon + CharsNotIn('{};') ).suppress() _p_lease_decl = ( _p_lease_deleted.setResultsName('deleted') | _p_lease_hardware_ethernet.setResultsName('mac') | _p_lease_junk ) + Literal(';').suppress() _p_lease = ( Keyword("lease").suppress() + _p_ip_address.setResultsName('ip') + _ungroup( nestedExpr( opener='{', closer='}', content=_p_lease_decl, ignoreExpr=quotedString, ), ) ).setParseAction(dictify) def parse(s): g = _p_lease.scanString(s) while True: try:
structured_data = Or([nilvalue, sd_elements.setResultsName("SD_ELEMENTS")]) structured_data = structured_data.setResultsName("STRUCTURED_DATA") time_hour = Regex("0[0-9]|1[0-9]|2[0-3]") time_minute = Regex("[0-5][0-9]") time_second = time_minute time_secfrac = Regex("\.[0-9]{1,6}") time_numoffset = Or([Regex("\+"), Regex("-")]) + time_hour + ":" + time_minute time_offset = Or([Regex("Z"), time_numoffset]) partial_time = time_hour + ":" + time_minute + ":" + time_second + Optional(time_secfrac) full_time = partial_time + time_offset date_mday = Regex("[0-9]{2}") date_month = Regex("0[1-9]|1[0-2]") date_fullyear = Regex("[0-9]{4}") full_date = date_fullyear + "-" + date_month + "-" + date_mday timestamp = Combine(Or([nilvalue, full_date + "T" + full_time])) timestamp = timestamp.setResultsName("TIMESTAMP") msgid = Or([nilvalue, CharsNotIn('= ]"', 1, 32)]) if SUPPORT_MISSING_VALUES: msgid = Optional(msgid) msgid = msgid.setResultsName("MSGID") procid = Or([nilvalue, CharsNotIn('= ]"', 1, 128)]) if SUPPORT_MISSING_VALUES: procid = Optional(procid) procid = procid.setResultsName("PROCID") app_name = Or([nilvalue, CharsNotIn('= ]"', 1, 48)]) if SUPPORT_MISSING_VALUES: app_name = Optional(app_name) app_name = app_name.setResultsName("APP_NAME") hostname = Or([nilvalue, CharsNotIn('= ]"', 1, 255)]) if SUPPORT_MISSING_VALUES: hostname = Optional(hostname)
# RULES: Time expression # A valid time specification. Possibilities: Year, Month-Year / Year-Month, Time span (two dates) period_name = Or([ Literal("Year"), Literal("Semester"), Literal("Quarter"), Literal("Month") ]) four_digits_year = Combine( Word(nums, min=4, max=4) + Optional(Literal(".0")).suppress()) month = Word(nums, min=1, max=2) year_month_separator = oneOf("- /") date = Group( Or([ four_digits_year.setResultsName("y") + Optional(year_month_separator.suppress() + month.setResultsName("m")), Optional(month.setResultsName("m") + year_month_separator.suppress()) + four_digits_year.setResultsName("y") ])) date_month = Or([ four_digits_year + year_month_separator + month, month + year_month_separator + four_digits_year ]) two_dates_separator = oneOf("- /") time_expression = Or([ (date + Optional(two_dates_separator.suppress() + date) ).setParseAction(lambda _s, l, t: { 'type': 'time', 'dates': [{k: int(v) for k, v in d.items()} for d in t]
tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed) set_slice_component = number | symbol | "*" set_slice_record = LPAREN + NotAny("tr") + delimitedList(set_slice_component) + RPAREN set_slice_record.setParseAction(SliceRecord) _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress(":=") set_record = simple_data | _set_record non_dimen_set_record = non_dimen_simple_data | _set_record set_def_stmt = ( KW_SET + symbol + Optional(subscript_domain) + Optional(Keyword("dimen") + integer.setResultsName("dimen")) + END ) set_def_stmt.setParseAction(SetDefStmt) set_member = LBRACKET + delimitedList(data) + RBRACKET set_stmt = ( KW_SET + symbol + Optional(set_member).setResultsName("member") + Group( non_dimen_set_record + ZeroOrMore(Optional(Suppress(",")) + set_record) ).setResultsName("records") + END )
or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) is_ = Keyword("is", caseless=True) not_ = Keyword("not", caseless=True) between_ = Keyword("between", caseless=True) null = Keyword("null", caseless=True) #query elements selectStmt = Forward() colIdent = Word( alphanums + "_$().*[]'" ).setName("column identifier") columnName = Combine( (caseStart + SkipTo(caseEnd, include=True)) | SkipTo(",", failOn=asToken) | SkipTo(asToken) | SkipTo(fromToken) | SkipTo(havingToken) , adjacent=False) columnNameList = delimitedList( columnName.setResultsName("columns", listAllMatches=True) + Optional(asToken + colIdent) ) tableName = Word( alphanums + "_$." ).setName("table identifier") whereExpression = Forward() havingExpression = Forward() whereColumn = (( caseStart + SkipTo(caseEnd, include=True)) | ( Optional("(") + colIdent + Optional("(") + ZeroOrMore(arithop + Optional("(") + colIdent + Optional(")")) + Optional(")") ) ) columnRval = realNum | intNum | quotedString | whereColumn # need to add support for alg expressions condition = Group( ( whereColumn + binop + columnRval ) |
# getNTPservers.py # # Demonstration of the parsing module, implementing a HTML page scanner, # to extract a list of NTP time servers from the NIST web site. # # Copyright 2004, by Paul McGuire # from pyparsing import Word, Combine, Suppress, CharsNotIn, nums import urllib integer = Word(nums) ipAddress = Combine(integer + "." + integer + "." + integer + "." + integer) tdStart = Suppress("<td>") tdEnd = Suppress("</td>") timeServerPattern = tdStart + ipAddress.setResultsName("ipAddr") + tdEnd + \ tdStart + CharsNotIn("<").setResultsName("loc") + tdEnd # get list of time servers nistTimeServerURL = "http://www.boulder.nist.gov/timefreq/service/time-servers.html" serverListPage = urllib.urlopen(nistTimeServerURL) serverListHTML = serverListPage.read() serverListPage.close() addrs = {} for srvr, startloc, endloc in timeServerPattern.scanString(serverListHTML): print srvr.ipAddr, "-", srvr.loc addrs[srvr.ipAddr] = srvr.loc # or do this: #~ addr,loc = srvr #~ print addr, "-", loc
floatVector = Group(floatValue + floatValue + floatValue) limit = Group( Suppress(Literal("(")) + floatValue + floatValue + Suppress(Literal(")"))) limits = Group(OneOrMore(limit)) channel = Word("TRtr","XYZxyz") channels = Group(OneOrMore(channel)) rotationOrder = Word("XYZ", exact=3) begin = Suppress(Keyword("begin")) end = Suppress(Keyword("end")) bonename = Combine(~end + Word(alphanums+"_-")).setWhitespaceChars(' ') version = Keyword(":version") + Literal("1.10") skeletonName = Keyword(":name") + bonename.setResultsName('name') unitDefinition = Group(Word(alphas) + (floatValue | intValue | Word(alphas))) unitSection = Keyword(":units") + \ Dict(ZeroOrMore(unitDefinition)).setResultsName('units') documentationSection = Keyword(':documentation') + \ SkipTo(":").setResultsName('documentation') rootSection = Group(Keyword(":root") & (Keyword("order") + channels.setResultsName('channels')) & (Keyword("position") + floatVector.setResultsName('position')) & (Keyword("axis") + rotationOrder.setResultsName("axisRotationOrder")) & (Keyword("orientation") + floatVector.setResultsName("axis")) ).setResultsName('root')
Literal('-').suppress() + Word(nums, min=1, max=2).setResultsName('day'), "pat:yyyy/m/d": year + Literal('/').suppress() + Word(nums, min=1, max=2).setResultsName('month') + Literal('/').suppress() + Word(nums, min=1, max=2).setResultsName('day'), "pat:yyyymmdd": year + month + day, "pat:mm/dd/yyyy": month + Literal("/").suppress() + day + year, "pat:mm-dd-yyyy": month + Literal("-").suppress() + day + Literal("-").suppress() + year, } COMMON_DATE_PATTERNS = { 'pat:mm/dd/yyyy': month.setResultsName('month') + Literal('/').suppress() + day.setResultsName('day') + Literal('/').suppress() + year.setResultsName('year'), 'pat:yyyy/m/d': Word(nums, exact=4).setResultsName('year') + '/' + Word(nums, min=1, max=2).setResultsName('month') + '/' + Word(nums, min=1, max=2).setResultsName('day'), 'pat:d/m/yyyy': Word(nums, min=1, max=2).setResultsName('day') + Literal('/').suppress() + Word(nums, min=1, max=2).setResultsName('month') + Literal('/').suppress() + Word(nums, exact=4).setResultsName('year'), 'pat:m/d/yy': Word(nums, min=1, max=2).setResultsName('month') + Literal('/').suppress() + Word(nums, min=1, max=2).setResultsName('day') + Literal('/').suppress() + Word(nums, exact=2).setResultsName('year'), 'pat:d/m/yy':
def __init__(self, network): self.network = network self.g_business_relationship = nx.DiGraph() self.user_defined_sets = {} self.user_library_calls = [] self.user_defined_functions = {} # Grammars #TODO: tidy this up attribute_unnamed = Word(alphanums+'_'+".") attribute = attribute_unnamed.setResultsName("attribute") self.attribute = attribute lt = Literal("<").setResultsName("<") le = Literal("<=").setResultsName("<=") eq = Literal("=").setResultsName("=") ne = Literal("!=").setResultsName("!=") ge = Literal(">=").setResultsName(">=") gt = Literal(">").setResultsName(">") wildcard = Literal("*").setResultsName("wildcard") self.wildcard = wildcard self.prefix_lists = {} self.tags_to_allocate = set() self.allocated_tags = {} self._opn = { '<': operator.lt, '<=': operator.le, '=': operator.eq, '!=': operator.ne, '>=': operator.ge, '>': operator.gt, '&': set.intersection, '|': set.union, } # map alphanum chars to alphanum equivalents for use in tags self._opn_to_tag = { '<': "lt", '<=': "le", '=': "eq", '!=': "ne", '>=': "ge", '>': "gt", '&': "and", '|': "or", } # Both are of comparison to access in same manner when evaluating comparison = (lt | le | eq | ne | ge | gt).setResultsName("comparison") stringComparison = (eq | ne).setResultsName("comparison") # #quoted string is already present float_string = Word(nums).setResultsName("value").setParseAction(lambda t: float(t[0])) integer_string = Word(nums).setResultsName("value").setParseAction(lambda t: int(t[0])) #TODO: use numString, and make integer if fiull stop #TODO: allow parentheses? - should be ok as pass to the python parser ipField = Word(nums, max=3) ipAddress = Combine( ipField + "." + ipField + "." + ipField + "." + ipField ).setResultsName("ipAddress") boolean_and = Literal("&").setResultsName("&") boolean_or = Literal("|").setResultsName("|") boolean = (boolean_and | boolean_or).setResultsName("boolean") self._boolean = boolean # need to use in checking #TODO fix this matching 2a.ab when that should match a string numericQuery = Group(attribute + comparison + float_string).setResultsName( "numericQuery") stringValues = (attribute_unnamed | quotedString.setParseAction(removeQuotes) ).setResultsName("value") stringQuery = Group(attribute + stringComparison + stringValues).setResultsName( "stringQuery") wildcardQuery = wildcard.setResultsName("wildcardQuery") singleQuery = numericQuery | stringQuery | wildcardQuery singleQuery.setFailAction(parse_fail_action) self.nodeQuery = singleQuery + ZeroOrMore(boolean + singleQuery) self.u_egress = Literal("egress->").setResultsName("u_egress") self.v_ingress = Literal("->ingress").setResultsName("v_ingress") self.u_ingress = Literal("ingress<-").setResultsName("u_ingress") self.v_egress = Literal("<-egress").setResultsName("v_egress") edgeType = ( self.u_egress | self.u_ingress | self.v_egress | self.v_ingress).setResultsName("edgeType").setFailAction(parse_fail_action) self.edgeQuery = ("(" + self.nodeQuery.setResultsName("query_a") + ")" + edgeType + "(" + self.nodeQuery.setResultsName("query_b") + ")").setFailAction(parse_fail_action) #start of BGP queries originQuery = (Literal("Origin").setResultsName("attribute") + #this is a workaround for the match, comparison, value 3-tuple in processing Literal("(").setResultsName("comparison") + Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("originQuery") transitQuery = (Literal("Transit").setResultsName("attribute") + #this is a workaround for the match, comparison, value 3-tuple in processing Literal("(").setResultsName("comparison") + Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("transitQuery") prefixList = Literal("prefix_list") matchPl = (prefixList.setResultsName("attribute") + comparison + attribute.setResultsName("value")) matchTag = (Literal("tag").setResultsName("attribute") + comparison + attribute.setResultsName("value")) #tags contain -> tag = aaa inTags = ( Literal("tags").setResultsName("attribute").setParseAction(lambda x: "tag") + Literal("contain").setResultsName("comparison").setParseAction(lambda x: "=") + attribute_unnamed.setResultsName("value") ) bgpMatchQuery = Group(matchPl | matchTag | inTags | originQuery | transitQuery ).setResultsName("bgpMatchQuery").setFailAction(parse_fail_action) self.bgpMatchQuery = bgpMatchQuery setLP = (Literal("setLP").setResultsName("attribute") + integer_string.setResultsName("value")).setResultsName("setLP") setMED = (Literal("setMED").setResultsName("attribute") + integer_string.setResultsName("value")).setResultsName("setMED") addTag = (Literal("addTag").setResultsName("attribute") + attribute.setResultsName("value")).setResultsName("addTag") removeTag = (Literal("removeTag").setResultsName("attribute") + attribute.setResultsName("value")).setResultsName("removeTag") #TODO: need to set blank value reject = Literal("reject") #TODO: remove once move quagga output inside module self.reject = reject rejectAction = (reject.setResultsName("attribute") + Literal("route").setResultsName("value")).setResultsName("reject") setNextHop = (Literal("setNextHop").setResultsName("attribute") + ipAddress.setResultsName("value")).setResultsName("setNextHop") setOriginAttribute = (Literal("setOriginAttribute").setResultsName("attribute") + (oneOf("IGP BGP None").setResultsName("value"))).setResultsName("setOriginAttribute") bgpAction = Group(addTag | setLP | setMED | removeTag | setNextHop | setOriginAttribute | rejectAction).setResultsName("bgpAction") # The Clauses ifClause = Group(Suppress("if") + bgpMatchQuery + ZeroOrMore(Suppress(boolean_and) + bgpMatchQuery)).setResultsName("if_clause") actionClause = bgpAction + ZeroOrMore(Suppress(boolean_and) + bgpAction) thenClause = Group(Suppress("then") + actionClause).setResultsName("then_clause") ifThenClause = Group(Suppress("(") + ifClause + thenClause + Suppress(")")).setResultsName("ifThenClause") elseActionClause = Group(Suppress("(") + actionClause + Suppress(")")).setResultsName("else_clause") # Support actions without a condition (ie no "if") unconditionalAction = Group(Suppress("(") + Group(actionClause).setResultsName("unconditionalActionClause") + Suppress(")")).setResultsName("bgpSessionQuery") # Query may contain itself (nested) bgpSessionQuery = Forward() bgpSessionQuery << ( ifThenClause + Optional( Suppress("else") + (elseActionClause | bgpSessionQuery)) ).setResultsName("bgpSessionQuery") bgpSessionQuery = bgpSessionQuery | unconditionalAction self.bgpSessionQuery = bgpSessionQuery self.bgpApplicationQuery = self.edgeQuery + Suppress(":") + self.bgpSessionQuery # Library stuff set_values = Suppress("{") + delimitedList( attribute, delim=',').setResultsName("set_values") + Suppress("}") #Set to empty set, rather than empty list as empty list is processed differently somewhere in parser empty_set = Literal("{}").setResultsName("set_values").setParseAction(lambda x: set()) self.set_definition = attribute.setResultsName("set_name") + Suppress("=") + (empty_set | set_values) library_params = attribute | Group(set_values) | empty_set library_function = attribute.setResultsName("def_name") + Suppress("(") + delimitedList( library_params, delim=',').setResultsName("def_params") + Suppress(")") library_function.setFailAction(parse_fail_action) self.library_def = Suppress("define") + library_function self.library_call = Suppress("apply") + library_function self.library_def.setFailAction(parse_fail_action) self.library_edge_query = (self.attribute.setResultsName("query_a") + edgeType + self.attribute.setResultsName("query_b")) self.library_edge_query.setFailAction(parse_fail_action) library_edge_definition = self.library_edge_query + Suppress(":") + self.bgpSessionQuery library_global_definition = "global tags = {" + delimitedList( attribute, delim=',').setResultsName("tags") + "}" self.library_entry = library_global_definition.setResultsName("global_tags") | library_edge_definition.setResultsName("library_edge") self.library_entry.setFailAction(parse_fail_action) self.bgpPolicyLine = ( self.bgpApplicationQuery.setResultsName("bgpApplicationQuery") | self.library_call.setResultsName("library_call") | self.set_definition.setResultsName("set_definition") )
# getNTPserversNew.py # # Demonstration of the parsing module, implementing a HTML page scanner, # to extract a list of NTP time servers from the NIST web site. # # Copyright 2004, by Paul McGuire # from pyparsing import Word, Combine, Suppress, SkipTo, nums, makeHTMLTags import urllib integer = Word(nums) ipAddress = Combine( integer + "." + integer + "." + integer + "." + integer ) tdStart,tdEnd = makeHTMLTags("td") timeServerPattern = tdStart + ipAddress.setResultsName("ipAddr") + tdEnd + \ tdStart + SkipTo(tdEnd).setResultsName("loc") + tdEnd # get list of time servers nistTimeServerURL = "http://www.boulder.nist.gov/timefreq/service/time-servers.html" serverListPage = urllib.urlopen( nistTimeServerURL ) serverListHTML = serverListPage.read() serverListPage.close() addrs = {} for srvr,startloc,endloc in timeServerPattern.scanString( serverListHTML ): print srvr.ipAddr, "-", srvr.loc addrs[srvr.ipAddr] = srvr.loc
tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed) set_slice_component = number | symbol | '*' set_slice_record = LPAREN + NotAny('tr') + delimitedList( set_slice_component) + RPAREN set_slice_record.setParseAction(SliceRecord) _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress( ":=") set_record = simple_data | _set_record non_dimen_set_record = non_dimen_simple_data | _set_record set_def_stmt = "set" + symbol + Optional(subscript_domain) + \ Optional("dimen" + integer.setResultsName('dimen')) + END set_def_stmt.setParseAction(SetDefStmt) set_member = LBRACKET + delimitedList(data) + RBRACKET set_stmt = "set" + symbol + Optional(set_member).setResultsName("member") + \ Group(non_dimen_set_record + ZeroOrMore(Optional(Suppress(',')) + set_record)) \ .setResultsName("records") + END set_stmt.setParseAction(SetStmt) subscript = single param_data = data | '.' plain_data = param_data | subscript + ZeroOrMore( Optional(Suppress(',')) + subscript) + param_data # should not match a single (tr)
elif arg.startswith('@'): if arg_grammar: arg_grammar = arg_grammar + word else: arg_grammar = word else: raise SyntaxError("Argument with unknown sigil: {}".format(arg)) if arg_grammar: opcode_syntax.append(op_grammar + arg_grammar.setResultsName('args')) else: opcode_syntax.append(op_grammar) opcode = Or(opcode_syntax) # Grammar is all of this OR'd grammar = StringStart() + ( (data.setResultsName('data') ^ label.setResultsName('label') ^ opcode) + Optional(comment)) | Optional(comment) logging.info("Generated grammar") logging.debug(grammar) ##### # Replace ASM macros and localize labels in the source ##### logging.info("Replacing ASM macros and localizing labels") line_num = 0 concat_source = [] for input_file in args.sources: label_prefix = "{}_".format( input_file.split('/')[-1].split('.')[0].replace(' ', '_')).upper() with open(input_file, 'r') as fh: while True:
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word ) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"') ).setResultsName('quotes') | operator_word operator_parenthesis = Group( (Suppress('(') + operator_or + Suppress(")")) ).setResultsName('parenthesis') | operator_quotes operator_not = Forward() operator_not << (Group( Suppress(Keyword('no', caseless=True)) + operator_not ).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << (Group( operator_not + Suppress(Keyword('and', caseless=True)) + operator_and ).setResultsName('and') | Group( operator_not + OneOrMore(~oneOf('and or') + operator_and) ).setResultsName('and') | operator_not) operator_or << (Group( operator_and + Suppress(Keyword('or', caseless=True)) + operator_or ).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2)) date = Combine((time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) + ' ' + time_cmpnt + ':' + time_cmpnt) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + ( White(min=16).setParseAction(lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task')) ) + restOfLine.setResultsName('message') )
_sign = Word('+-', exact=1) number = Combine(Optional(_sign) + Word(nums) + Optional('.' + Optional(Word(nums)))) \ .setName('number').setResultsName('number') integer = Combine(Optional(_sign) + Word(nums)) \ .setName('number').setResultsName('number') boolean = (upkey('true') | upkey('false')).setName('bool') binary = Combine('b' + quotedString) value = Forward() json_value = Forward() string = quotedString.setResultsName('str') json_primitive = (null.setResultsName('null') | number | string | boolean.setResultsName('bool')) set_primitive = (number.setResultsName('number') | quotedString.setResultsName('str') | binary.setResultsName('binary')) primitive = (json_primitive | binary.setResultsName('binary')) _emptyset = Keyword('()').setResultsName('set') set_ = (Suppress('(') + delimitedList(Group(set_primitive)) + Suppress(')')).setResultsName('set') list_ = (Suppress('[') + Optional(delimitedList(json_value)) + Suppress(']')).setResultsName('list') key_val = (Group(quotedString.setResultsName('str')) + Suppress(':') + json_value) dict_ = (Suppress('{') + Optional(delimitedList(Group(key_val))) + Suppress('}')).setResultsName('dict') json_value <<= Group(json_primitive | list_ | dict_) ts_functions = Group(
integer = ( Combine(Optional(_sign) + Word(nums)).setName("number").setResultsName("number") ) boolean = (upkey("true") | upkey("false")).setName("bool") binary = Combine("b" + quotedString) value = Forward() json_value = Forward() string = quotedString.setResultsName("str") json_primitive = ( null.setResultsName("null") | number | string | boolean.setResultsName("bool") ) set_primitive = ( number.setResultsName("number") | quotedString.setResultsName("str") | binary.setResultsName("binary") ) primitive = json_primitive | binary.setResultsName("binary") _emptyset = Keyword("()").setResultsName("set") set_ = ( Suppress("(") + delimitedList(Group(set_primitive)) + Suppress(")") ).setResultsName("set") list_ = ( Suppress("[") + Optional(delimitedList(json_value)) + Suppress("]") ).setResultsName("list") key_val = Group(quotedString.setResultsName("str")) + Suppress(":") + json_value dict_ = ( Suppress("{") + Optional(delimitedList(Group(key_val))) + Suppress("}") ).setResultsName("dict") json_value <<= Group(json_primitive | list_ | dict_)
intValue = Word(nums).setParseAction(lambda s, l, t: int(t[0])) floatValue = Regex(r'-?\d+(\.\d*)?(e-?\d*)?').setParseAction( lambda s, l, t: float(t[0])) floatVector = Group(floatValue + floatValue + floatValue) limit = Group( Suppress(Literal("(")) + floatValue + floatValue + Suppress(Literal(")"))) limits = Group(OneOrMore(limit)) channel = Word("TRtr", "XYZxyz") channels = Group(OneOrMore(channel)) rotationOrder = Word("XYZ", exact=3) begin = Suppress(Keyword("begin")) end = Suppress(Keyword("end")) bonename = Combine(~end + Word(alphanums + "_-")).setWhitespaceChars(' ') version = Keyword(":version") + Literal("1.10") skeletonName = Keyword(":name") + bonename.setResultsName('name') unitDefinition = Group(Word(alphas) + (floatValue | intValue | Word(alphas))) unitSection = Keyword(":units") + \ Dict(ZeroOrMore(unitDefinition)).setResultsName('units') documentationSection = Keyword(':documentation') + \ SkipTo(":").setResultsName('documentation') rootSection = Group( Keyword(":root") & (Keyword("order") + channels.setResultsName('channels')) & (Keyword("position") + floatVector.setResultsName('position')) & (Keyword("axis") + rotationOrder.setResultsName("axisRotationOrder")) & (Keyword("orientation") + floatVector.setResultsName("axis"))).setResultsName('root') bone = Group(begin + Keyword("id") + intValue + Keyword("name") + bonename.setResultsName("name") + Keyword("direction") + floatVector.setResultsName("direction") + Keyword("length") + floatValue.setResultsName("length") + Keyword("axis") +
# getNTPservers.py # # Demonstration of the parsing module, implementing a HTML page scanner, # to extract a list of NTP time servers from the NIST web site. # # Copyright 2004, by Paul McGuire # from pyparsing import Word, Combine, Suppress, CharsNotIn, nums import urllib integer = Word(nums) ipAddress = Combine(integer + "." + integer + "." + integer + "." + integer) tdStart = Suppress("<td>") tdEnd = Suppress("</td>") timeServerPattern = ( tdStart + ipAddress.setResultsName("ipAddr") + tdEnd + tdStart + CharsNotIn("<").setResultsName("loc") + tdEnd ) # get list of time servers nistTimeServerURL = "http://www.boulder.nist.gov/timefreq/service/time-servers.html" serverListPage = urllib.urlopen(nistTimeServerURL) serverListHTML = serverListPage.read() serverListPage.close() addrs = {} for srvr, startloc, endloc in timeServerPattern.scanString(serverListHTML): print srvr.ipAddr, "-", srvr.loc addrs[srvr.ipAddr] = srvr.loc # or do this: # ~ addr,loc = srvr # ~ print addr, "-", loc
select_tok = Keyword('select', caseless=True) from_tok = Keyword('from', caseless=True) # for parsing select-from statements idr = ~keyword + Word(alphas + '*', alphanums + '_/-.*').setName('identifier') table_path = Word(''.join([c for c in printables if c not in "?"])).setResultsName('path') table_alias = idr.setResultsName('alias') table_idr = table_path + Optional(Optional(Suppress('as')) + table_alias) column_idr = delimitedList(idr, '.', combine=True) aggregate_function = Combine(Keyword('count') + '(' + column_idr + ')') column_list = Group( delimitedList( (column_idr ^ aggregate_function.setResultsName('aggregate_functions', listAllMatches=True)))) # for parsing where statements and_ = Keyword('and', caseless=True) or_ = Keyword('or', caseless=True) in_ = Keyword('in', caseless=True) E = CaselessLiteral('E') binary_op = oneOf('= != < > >= <= eq ne lt le gt ge', caseless=True) arith_sign = Word('+-', exact=1) real_num = Combine( Optional(arith_sign) + (Word(nums) + '.' + Optional(Word(nums)) | ('.' + Word(nums))) + Optional(E + Optional(arith_sign) + Word(nums)))
+ tld_label_regex domain_fqdn = Regex(domain_fqdn_regex) domain_fqdn.setName('<strict-fqdn>') domain_fqdn.setResultsName('domain_name') # Generic fully-qualified domain name (less stringent) domain_generic_fqdn = Combine( domain_generic_label + ZeroOrMore( Literal('.') + domain_generic_label ) + Optional(Char('.')) ) domain_generic_fqdn.setName('<generic-fqdn>') domain_generic_fqdn.setResultsName('domain_name') quoted_domain_generic_fqdn = ( Combine(squote - domain_generic_fqdn - squote) | Combine(dquote - domain_generic_fqdn - dquote) ) quoted_domain_generic_fqdn.setName('<quoted_domain_name>') quotable_domain_generic_fqdn = ( Combine(squote - domain_generic_fqdn - squote) | Combine(dquote - domain_generic_fqdn - dquote) | domain_generic_fqdn ) quotable_domain_generic_fqdn.setName('<quotable_domain_name>') # Following is commonly used in association with DNS zone records
def build_grammer(): selectStmt = Forward() compoundselectStmt = Forward() subgraphselectStmt = Forward() sampleselectStmt = Forward() setStmt = Forward() selectToken = oneOf("select find get what search list", caseless=True) fromToken = Keyword("from", caseless=True) whereToken = Keyword("where", caseless=True) sampleToken =Keyword("sample",caseless=True) subgraphToken =Keyword("subgraph",caseless=True) neighborsToken =Keyword("neighbors",caseless=True) targetToken = oneOf("edges nodes node edge",caseless=True) ident = Word( alphas, alphanums + "_$").setName("identifier") columnName = Combine((oneOf("v u e")+"."+ ident)) | Combine(neighborsToken+"("+Word(nums).setResultsName("friends",listAllMatches=True)+")"+"."+ident) |ident whereExpression = Forward() runs_ = Keyword("number",caseless=True) and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) size_ = Keyword("size", caseless=True) identifier = Word(alphas+"_", alphanums+"_") E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) setop = oneOf("union intersect except", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) samplestmt = (sampleToken+"("+runs_+"="+Word( nums ).setResultsName("runs")+","+size_+"="+"["+Word( nums ).setResultsName("lb")+","+Word( nums ).setResultsName("sample")+"]"+")") subgraphstmt = (subgraphToken.setResultsName("type")+"("+Word(nums).setResultsName("startnode")+","+Word(nums).setResultsName("depth")+")") intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) columnRval = realNum | intNum | quotedString | columnName.setResultsName("column",listAllMatches=True) whereCondition = (columnName.setResultsName("column",listAllMatches=True)+ binop + columnRval )| ( columnName.setResultsName("column",listAllMatches=True) + in_ + "(" + columnRval +ZeroOrMore("," + columnRval) + ")" ) whereCondition2 = Group((columnName.setResultsName("column",listAllMatches=True)+ binop + columnRval )| ( columnName.setResultsName("column",listAllMatches=True) + in_ + "(" + columnRval +ZeroOrMore("," + columnRval) + ")" ) | ( "(" + whereExpression + ")" )) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) defstmt =ident.setResultsName( "graph")+"."+ targetToken.setResultsName( "type") + "="+ "{" + delimitedList( whereCondition2 ).setResultsName("compactwhere") + "}" function_call = identifier.setResultsName("func",listAllMatches=True) + "(" + ((delimitedList(identifier|Word(nums)).setResultsName("args",listAllMatches=True))) + ")" | identifier.setResultsName("func",listAllMatches=True) wfstmt = Optional(delimitedList(function_call)) selectStmt << ( selectToken + targetToken.setResultsName( "type" ) + fromToken + (ident.setResultsName( "graph"))+ Optional(whereToken + (whereExpression).setResultsName("where", listAllMatches=True) )) sampleselectStmt << ( selectToken +samplestmt+ targetToken.setResultsName( "type") + fromToken + (ident.setResultsName( "graph"))+ Optional(whereToken + (whereExpression).setResultsName("where", listAllMatches=True) )) subgraphselectStmt << ( selectToken +subgraphstmt + fromToken + (ident.setResultsName( "graph"))) compoundselectStmt << selectStmt.setResultsName("select",listAllMatches=True) + ZeroOrMore(setop.setResultsName("setop",listAllMatches=True) + selectStmt ) setStmt << ident.setResultsName("setname",listAllMatches=True) + ZeroOrMore (setop.setResultsName("setp",listAllMatches=True) + ident.setResultsName("setname")) SQL = sampleselectStmt|compoundselectStmt|subgraphselectStmt|setStmt bSQL = SQL SqlComment = "--" + restOfLine bSQL.ignore( SqlComment ) return bSQL
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"')).setResultsName('quotes') | operator_word operator_parenthesis = Group( (Suppress('(') + operator_or + Suppress(")") )).setResultsName('parenthesis') | operator_quotes operator_not = Forward() operator_not << ( Group(Suppress(Keyword('no', caseless=True)) + operator_not).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << ( Group(operator_not + Suppress(Keyword('and', caseless=True)) + operator_and).setResultsName('and') | Group(operator_not + OneOrMore(~oneOf('and or') + operator_and) ).setResultsName('and') | operator_not) operator_or << ( Group(operator_and + Suppress(Keyword('or', caseless=True)) + operator_or).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2)) date = Combine((time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) + ' ' + time_cmpnt + ':' + time_cmpnt) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + (White(min=16).setParseAction( lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task'))) + restOfLine.setResultsName('message'))
def parser_bnf(): """Grammar for parsing podcast configuration files.""" at = Literal("@").suppress() caret = Literal("^") colon = Literal(":").suppress() left_bracket = Literal("[").suppress() period = Literal(".").suppress() right_bracket = Literal("]").suppress() # zero_index ::= [0-9]+ zero_index = Word(nums).setParseAction(lambda s, l, t: int(t[0])) # filename ::= [A-Za-z0-9][-A-Za-z0-9._ ]+ filename_first = Word(alphanums, exact=1) filename_rest = Word(alphanums + "-_/. ") filename = Combine(filename_first + Optional(filename_rest)) # millisecs ::= "." [0-9]+ millisecs = (Word(nums).setParseAction( lambda s, l, t: int(t[0][:3].ljust(3, "0"))).setResultsName("ms")) # hours, minutes, seconds ::= zero_index hours = zero_index.setResultsName("hh") minutes = zero_index.setResultsName("mm") seconds = zero_index.setResultsName("ss") hours_minutes = hours + colon + minutes + colon | minutes + colon secs_millisecs = (seconds + Optional(period + millisecs) | period + millisecs) # timestamp ::= [[hours ":"] minutes ":"] seconds ["." millisecs] timestamp = Optional(hours_minutes) + secs_millisecs # duration_file ::= "@", filename # We need a separate item for a lonely duration file timestamp so # that we can attach a parse action just to the lonely case. Using # duration_file alone means the parse action is attached to all # instances of duration_file. duration_file = at + filename.setResultsName("filename") lonely_duration_file = at + filename.setResultsName("filename") # timespecs ::= timestamp [duration_file | {timestamp}] # If duration_file timestamp is lonely, prepend a zero timestamp. timespecs = Or([ lonely_duration_file.setParseAction( lambda s, l, t: [timestamp.parseString("00:00:00.000"), t]), Group(timestamp) + duration_file, OneOrMore(Group(timestamp.setParseAction(default_timestamp_fields))) ]) # last_frame ::= "-1" | "last" last_frame = oneOf(["-1", "last"]).setParseAction(replaceWith(-1)) # frame_number ::= ":" (zero_index | last_frame) frame_number = colon - (zero_index | last_frame).setResultsName("num") # stream_number ::= ":" zero_index stream_number = colon - zero_index.setResultsName("num") # input_file ::= ":" [filename] input_file = colon - Optional(filename).setResultsName("filename") # previous_segment ::= ":" "^" previous_segment = colon - caret.setResultsName("filename") # frame_input_file ::= input_file | previous_segment frame_input_file = Or([input_file, previous_segment]) # av_trailer ::= input_file [stream_number] av_trailer = input_file + Optional(stream_number) # frame_type ::= "frame" | "f" frame_type = oneOf(["f", "frame"]).setParseAction(replaceWith("frame")) # frame_input ::= frame_type [frame_input_file [frame_number]] frame_input = (frame_type.setResultsName("type") + Optional(frame_input_file + Optional(frame_number))) # video_type ::= "video" | "v" video_type = oneOf(["v", "video"]).setParseAction(replaceWith("video")) # audio_type ::= "audio" | "a" audio_type = oneOf(["a", "audio"]).setParseAction(replaceWith("audio")) # av_input ::= (audio_type | video_type) [av_trailer] av_input = ((audio_type | video_type).setResultsName("type") + Optional(av_trailer)) # inputspec ::= "[" (av_input | frame_input) "]" inputspec = (left_bracket + delimitedList( av_input | frame_input, delim=":").setParseAction(default_input_fields) - right_bracket) # segmentspec ::= inputspec [timespecs] segmentspec = Group(inputspec + Group(Optional(timespecs)).setResultsName("times")) # config ::= {segmentspec} config = ZeroOrMore(segmentspec) config.ignore(pythonStyleComment) return config