def parse_rest_case_data(rest_case_data): data = {} listing = p.Suppress(p.Literal("(")) + p.Word( p.alphanums).setResultsName("listing") + p.Suppress( p.Literal("Listing)")) additional_info = p.And([ p.SkipTo(p.Literal(": ")), p.Suppress(p.Literal(": ")), p.SkipTo(p.White(min=2) | p.StringEnd()), ]) rest_case_data_detail = p.And([ p.SkipTo(listing).setResultsName("address"), listing, p.SkipTo(p.LineStart() + p.Word(p.nums)).setResultsName("additional_info"), p.SkipTo(p.StringEnd()).setResultsName("rest_case_data") ]) for key, value in rest_case_data_detail.parseString( rest_case_data).asDict().items(): if key == "address": data['address'] = value[0].strip() elif key == "additional_info": additional_info = p.ZeroOrMore( p.Group(additional_info)).parseString(value[0]) data.update(dict(additional_info.asList())) else: data[key] = value.strip() return data
class CronPlugin(interface.SyslogPlugin): """A syslog plugin for parsing cron messages.""" NAME = u'cron' DESCRIPTION = u'Parser for syslog cron messages.' REPORTER = u'CRON' _PYPARSING_COMPONENTS = { u'command': pyparsing.Combine( pyparsing.SkipTo(pyparsing.Literal(u')') + pyparsing.StringEnd())).setResultsName( u'command'), u'username': pyparsing.Word(pyparsing.alphanums).setResultsName(u'username'), } _TASK_RUN_GRAMMAR = (pyparsing.Literal(u'(') + _PYPARSING_COMPONENTS[u'username'] + pyparsing.Literal(u')') + pyparsing.Literal(u'CMD') + pyparsing.Literal(u'(') + _PYPARSING_COMPONENTS[u'command'] + pyparsing.Literal(u')') + pyparsing.StringEnd()) MESSAGE_GRAMMARS = [(u'task_run', _TASK_RUN_GRAMMAR)] def ParseMessage(self, parser_mediator, key, timestamp, tokens): """Parses a syslog body that matched one of defined grammars. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the matching grammar. timestamp (int): the timestamp, which contains the number of micro seconds since January 1, 1970, 00:00:00 UTC or 0 on error. tokens (dict[str, str]): tokens derived from a syslog message based on the defined grammar. Raises: AttributeError: If an unknown key is provided. """ # TODO: change AttributeError into ValueError or equiv. if key != u'task_run': raise AttributeError(u'Unknown grammar key: {0:s}'.format(key)) event_data = CronTaskRunEventData() event_data.body = tokens.get(u'body', None) event_data.command = tokens.get(u'command', None) event_data.hostname = tokens.get(u'hostname', None) # TODO: pass line number to offset or remove. event_data.offset = 0 event_data.pid = tokens.get(u'pid', None) event_data.reporter = tokens.get(u'reporter', None) event_data.severity = tokens.get(u'severity', None) event_data.username = tokens.get(u'username', None) event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
class CronSyslogPlugin(interface.SyslogPlugin): """A syslog plugin for parsing cron messages.""" NAME = 'cron' DATA_FORMAT = 'Cron syslog line' REPORTER = 'CRON' _PYPARSING_COMPONENTS = { 'command': pyparsing.Combine( pyparsing.SkipTo(pyparsing.Literal(')') + pyparsing.StringEnd())).setResultsName('command'), 'username': pyparsing.Word(pyparsing.alphanums).setResultsName('username'), } _TASK_RUN_GRAMMAR = (pyparsing.Literal('(') + _PYPARSING_COMPONENTS['username'] + pyparsing.Literal(')') + pyparsing.Literal('CMD') + pyparsing.Literal('(') + _PYPARSING_COMPONENTS['command'] + pyparsing.Literal(')') + pyparsing.StringEnd()) MESSAGE_GRAMMARS = [('task_run', _TASK_RUN_GRAMMAR)] def ParseMessage(self, parser_mediator, key, date_time, tokens): """Parses a syslog body that matched one of defined grammars. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the matching grammar. date_time (dfdatetime.DateTimeValues): date and time values. tokens (dict[str, str]): tokens derived from a syslog message based on the defined grammar. Raises: ValueError: If an unknown key is provided. """ if key != 'task_run': raise ValueError('Unknown grammar key: {0:s}'.format(key)) event_data = CronTaskRunEventData() event_data.body = tokens.get('body', None) event_data.command = tokens.get('command', None) event_data.hostname = tokens.get('hostname', None) # TODO: pass line number to offset or remove. event_data.offset = 0 event_data.pid = tokens.get('pid', None) event_data.reporter = tokens.get('reporter', None) event_data.severity = tokens.get('severity', None) event_data.username = tokens.get('username', None) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data)
def enumeratedItems(baseExpr=None, form='[1]', **min_max): """Parser for enumerated items Examples: [1] abc [2] def ==> ['abc', 'def'] """ if form is None: form = '[1]' if '1' in form: no = pp.Regex(re.escape(form).replace( '1', '(?P<no>\\d+)')) #.setParseAction(lambda x:x.no) else: no = pp.Regex(re.escape(form)) # no.suppress() if 'exact' in min_max and min_max['exact'] > 0: max_ = min_ = exact else: min_ = min_max.get('min', 0) max_ = min_max.get('max', None) if baseExpr is None: return (pp.Group(no + pp.SkipTo(pp.StringEnd() | no).setParseAction( _strip()))) * (min_, max_) else: return (pp.Group(no + baseExpr.setParseAction(_strip()))) * (min_, max_)
def parse_type_def_str(typedef_str: str) -> List[model.types.ModelVar]: """Parse function type annotation.""" lpar = prs.Literal('(').suppress() rpar = prs.Literal(')').suppress() arr = prs.Literal('->').suppress() term = prs.Word(prs.alphas) func_def = prs.Forward() typ = term | prs.Group(lpar + func_def + rpar) func_def << typ + prs.ZeroOrMore(arr + typ) func_def += prs.StringEnd() res = func_def.parseString(typedef_str).asList() def unroll(lst): for t in lst: if isinstance(t, str): yield type_str_to_model(t) elif isinstance(t, list): args = unroll(t) func = model.types.FuncVar() [func.add_arg(a) for a in args] yield func else: assert False, "Unreachable code" return list(unroll(res))
def compile(): LBRACE, RBRACE, LBRACK, RBRACK, COLON = map(pp.Suppress, '{}[]:') value = pp.Forward() true = pp.Keyword('true').setParseAction(pp.replaceWith(True)) false = pp.Keyword('false').setParseAction(pp.replaceWith(False)) null = pp.Keyword('null').setParseAction(pp.replaceWith(None)) number = (pp.Regex( r'-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?').setParseAction( pp.tokenMap(float))) string = (pp.Regex( r'"([ !#-\[\]-\U0010ffff]+' r'|\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4}))*"').setParseAction( pp.tokenMap(json_unescape))) items = pp.delimitedList(value) array = (pp.Group(LBRACK - pp.Optional(items) + RBRACK).setParseAction(lambda t: t.asList())) member = pp.Group(string + COLON + value) members = pp.delimitedList(member) object = (pp.Dict(LBRACE - pp.Optional(members) + RBRACE).setParseAction(lambda t: t.asDict())) value << (object | array | string | number | true | false | null) json = value('top') + pp.StringEnd() json.setDefaultWhitespaceChars(' \t\n\r') json.parseWithTabs() return lambda s: json.parseString(s)['top']
class Tokens(object): # shared tokens delim_chars = '[]{},' pause = pp.FollowedBy(pp.Word(delim_chars) | pp.StringEnd()) number = (pp.pyparsing_common.number + pause) quoted_string = pp.QuotedString('"', escChar='\\') true = (pp.Regex(r'(True|true|yes|on)') + pause). \ setParseAction(lambda _: True) false = (pp.Regex(r'(False|false|no|off)') + pause). \ setParseAction(lambda _: False) unquoted_string = pp.CharsNotIn(delim_chars). \ setParseAction(lambda toks: toks[0].strip()) empty_value = pp.Empty(). \ setParseAction(lambda _: '') # tokens for configs identifier = pp.pyparsing_common.identifier.copy() comma = pp.Literal(',').suppress() assign = pp.Literal('=').suppress() config_value = ( number | true | false | quoted_string | unquoted_string | empty_value) key_value_pair = (identifier + assign + config_value). \ setParseAction(lambda toks: (toks[0], toks[1])) key_value_pair_list = pp.Optional( key_value_pair + pp.ZeroOrMore(comma + key_value_pair)) # tokens for tags tag = quoted_string | unquoted_string | pp.Empty().suppress() tag_list = pp.Optional(tag + pp.ZeroOrMore(comma + tag))
def _parse_regexes(regexes): """Parses regexes in the form of '/from/to/options'. Note: this doesn't handle regex patterns Args: regexes: a list of regex strings Returns: a list of (regex pattern, replacement string) """ regexBNF = (pp.Literal('/').suppress() + pp.Word(pp.printables + ' \t\n\r', excludeChars='/') + pp.Literal('/').suppress() + pp.Word(pp.printables + ' \t\n\r', excludeChars='/') + pp.Optional( pp.Literal('/').suppress() + pp.ZeroOrMore( pp.Literal('i') ^ pp.Literal('L') ^ pp.Literal('s') ^ pp.Literal('u') ^ pp.Literal('x'))) + pp.StringEnd()) results = [] for regex in regexes: try: parsed = regexBNF.parseString(regex) except pp.ParseException as e: print('Unable to parse regex {0}'.format(regex)) raise e regex_str = parsed[0] replace_str = parsed[1] if len(parsed) > 2: regex_str = '(?{0}){1}'.format(''.join(parsed[2:]), regex_str) print('Replacing {0} with {1}'.format(regex_str, replace_str)) results.append((re.compile(regex_str), replace_str)) return results
def get_guild_role_parser(self, guild): """ Create a role parser for the specified guild. :param guild: :return: """ intersect = pp.CaselessKeyword(self.INTERSECT_TOKEN) union = pp.CaselessKeyword(self.UNION_TOKEN) complement = pp.CaselessKeyword(self.COMPLEMENT_TOKEN) left_paren = pp.Literal(self.LEFT_PAREN_TOKEN) right_paren = pp.Literal(self.RIGHT_PAREN_TOKEN) role = pp.Word(pp.alphanums) | pp.QuotedString("'", escChar="\\") expression = pp.Forward() term = pp.Forward() factor = pp.Forward() factor <<= left_paren + expression + pp.FollowedBy( right_paren) + right_paren | complement + factor | role term <<= factor + intersect + term | factor expression <<= term + union + expression | term factor.setParseAction(self.get_guild_factor_action(guild)) term.setParseAction(self.get_guild_term_action()) expression.setParseAction(self.expression_action) role_statement = pp.StringStart() + expression + pp.StringEnd() return role_statement
class CronPlugin(interface.SyslogPlugin): """A syslog plugin for parsing cron messages.""" NAME = u'cron' DESCRIPTION = u'Parser for syslog cron messages.' REPORTER = u'CRON' _PYPARSING_COMPONENTS = { u'command': pyparsing.Combine( pyparsing.SkipTo(pyparsing.Literal(u')') + pyparsing.StringEnd())).setResultsName( u'command'), u'username': pyparsing.Word(pyparsing.alphanums).setResultsName(u'username'), } _TASK_RUN_GRAMMAR = (pyparsing.Literal(u'(') + _PYPARSING_COMPONENTS[u'username'] + pyparsing.Literal(u')') + pyparsing.Literal(u'CMD') + pyparsing.Literal(u'(') + _PYPARSING_COMPONENTS[u'command'] + pyparsing.Literal(u')') + pyparsing.StringEnd()) MESSAGE_GRAMMARS = [(u'task_run', _TASK_RUN_GRAMMAR)] def ParseMessage(self, parser_mediator, key, timestamp, tokens): """Parses a syslog body that matched one of defined grammars. Args: parser_mediator: a parser mediator object (instance of ParserMediator). key: a string indicating the name of the matching grammar. timestamp: the timestamp, which is an integer containing the number of micro seconds since January 1, 1970, 00:00:00 UTC or 0 on error. tokens: a dictionary containing the results of the syslog grammar, and the cron grammar. Raises: AttributeError: If an unknown key is provided. """ if key == u'task_run': parser_mediator.ProduceEvent(CronTaskRunEvent( timestamp, 0, tokens)) else: raise AttributeError(u'Unknown grammar key: {0:s}'.format(key))
def create_parser(slist, current_section): colon = pp.Literal(':') section = pp.Combine(colon + pp.Word(pp.alphas, pp.alphanums + '_ ') + colon) section.setParseAction(set_section(current_section)) section.setName('section') text = pp.SkipTo(section | pp.StringEnd()) text.setParseAction(to_list(slist, current_section)) text.setName('text') doc = pp.StringStart()\ + pp.Optional(text) + pp.ZeroOrMore(section + text)\ + pp.StringEnd() return doc
def parse_spectre(netlist_string): # newlines are part of the grammar, thus redifine the whitespaces without it ws = ' \t' _p.ParserElement.setDefaultWhitespaceChars(ws) # spectre netlist grammar definition EOL = _p.LineEnd().suppress() # end of line linebreak = _p.Suppress( "\\" + _p.LineEnd()) # breaking a line with backslash newline identifier = _p.Word(_p.alphanums + '_!<>-+') # a name for... number = _p.Word(_p.nums + ".") # a number net = identifier # a net nets = _p.Group(_p.OneOrMore(net('net') | linebreak)) # many nets cktname = identifier # name of a subcircuit cktname_end = _p.Keyword("ends").suppress() comment = _p.Suppress("//" + _p.SkipTo(_p.LineEnd())) expression = _p.Word(_p.alphanums + '._*+-/()') inst_param_key = identifier + _p.Suppress("=") inst_param_value = expression('expression') inst_parameter = _p.Group( inst_param_key('name') + inst_param_value('value')).setResultsName('key') parameters = _p.Group( _p.ZeroOrMore(inst_parameter | linebreak)).setResultsName('parameters') instref = identifier instname = identifier instance = _p.Group( instname('name') + _p.Suppress('(') + nets('nets') + _p.Suppress(')') + instref('reference') + parameters + EOL).setResultsName('instance') subcircuit_content = _p.Group( _p.ZeroOrMore(instance | EOL | comment)).setResultsName('subnetlist') subcircuit = _p.Group( # matches subckt <name> <nets> <newline> _p.Keyword("subckt").suppress() + cktname('name') + nets('nets') + EOL # matches the content of the subcircuit + subcircuit_content # matches ends <name> <newline> + cktname_end + _p.matchPreviousExpr(cktname).suppress() + EOL).setResultsName('subcircuit') topcircuit = _p.Group( # matches subckt <name> <nets> <newline> _p.Keyword("topckt").suppress() + cktname('name') + nets('nets') + EOL # matches the content of the subcircuit + subcircuit_content # matches ends <name> <newline> + cktname_end + _p.matchPreviousExpr(cktname).suppress() + EOL).setResultsName('topcircuit') netlist_element = subcircuit | topcircuit | EOL | comment('comment') netlist = _p.ZeroOrMore(netlist_element) + _p.StringEnd() parameters.setParseAction(handle_parameters) instance.setParseAction(handle_instance) subcircuit.setParseAction(handle_subcircuit) topcircuit.setParseAction(handle_topcircuit) return netlist.parseString(netlist_string)
def _parser(cls): # Exclude newlines from the default whitespace characters # We need to deal with them manually pp.ParserElement.setDefaultWhitespaceChars(cls.DEFAULT_WHITESPACE) line = cls._line() body = pp.OneOrMore(line) parser = body + pp.StringEnd() parser.ignore(cls.BLANKLINE) parser.ignore(cls.COMMENT) return parser
def __init__(self, *args, **kwargs): super(DisplayPreprocessor, self).__init__(*args, **kwargs) self.display_fields = list() if self.markdown: self.markdown.display_fields = self.display_fields self.ffields = pp.ZeroOrMore( (pp.Optional(pp.Word(pp.nums) + pp.Literal(":").suppress(), default=None) + utils.fident).setParseAction(lambda strg, loc, st: dict( article_pk=self.markdown.article.pk if st[0] is None else st[0], name=st[1], ))) + pp.StringEnd()
def _get_parser(): def _object(string, location, tokens): token = tokens[0] tokens[0] = (_OBJ, token) def _integer(string, location, tokens): try: token = int(tokens[0]) except ValueError: token = tokens[0] tokens[0] = (_OBJ, token) def _number(string, location, tokens): try: token = float(tokens[0]) except ValueError: token = tokens[0] tokens[0] = (_OBJ, token) def _test(string, location, tokens): token = tokens[0] tokens[0] = (_TEST, token) def _if(string, location, tokens): token = tokens[0] tokens[0] = (_IF, token) def _expr_var(string, location, tokens): token = tokens[0] tokens[0] = (_VALUE, token) def _expr_test(string, location, tokens): token = tokens[0] tokens[0] = (_TEST, token) white_space = pp.White().suppress() end = pp.StringEnd() operator = (pp.Literal(_EQUAL) | pp.Literal(_NOT_EQUAL)).setParseAction(_test) begin_if = pp.CaselessLiteral(_IF, ).setParseAction(_if) obj = pp.Word(pp.printables).setParseAction(_object) integer = pp.Word('0123456789-').setParseAction(_integer) number = pp.Word('0123456789-.').setParseAction(_number) item = integer | number | obj expr_var = pp.Group(obj + pp.Optional(white_space) + end).setParseAction(_expr_var) expr_test = pp.Group(obj + white_space + begin_if + white_space + item + white_space + operator + white_space + item).setParseAction(_expr_test) expr = pp.Optional(white_space) + (expr_test | expr_var) return expr
def get_simple_ref_parser(escape_character, reference_sentinels, export_sentinels): _ESCAPE = escape_character _REF_OPEN = reference_sentinels[0] _REF_CLOSE = reference_sentinels[1] _INV_OPEN = export_sentinels[0] _INV_CLOSE = export_sentinels[1] _EXCLUDES = _ESCAPE + _REF_OPEN + _REF_CLOSE + _INV_OPEN + _INV_CLOSE string = pp.CharsNotIn(_EXCLUDES).setParseAction(_string) ref_open = pp.Literal(_REF_OPEN).suppress() ref_close = pp.Literal(_REF_CLOSE).suppress() reference = (ref_open + pp.Group(string) + ref_close).setParseAction(_reference) line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional( string) + pp.StringEnd() return line
def _construct_fnsuffix_parser(): atom = pp.Regex(r"[^=,{}()[\]]+") value = pp.Forward().setName("value") # .setDebug() key = pp.Regex(r"\w*").setName("key") # .setDebug() item = pp.Dict(pp.Group(key + pp.Suppress("=") + value)) items = pp.delimitedList(item) dict_ = pp.Suppress("{") + items + pp.Suppress("}") list_, tuple_, set_ = (o + pp.delimitedList(value, combine=True) + c for o, c in zip(["[", "(", "{"], ["]", ")", "}"])) combine_values = [ pp.Combine(expr) for expr in (list_, tuple_, set_, atom + value) ] value << ( pp.quotedString | dict_ | pp.Or(combine_values) | atom ) # Caution: brackets are needed because of << operator precedence!! return dict_ + pp.StringEnd()
def _set_syntax(self): table_expr = table.copy() ref_expr = ref.copy() enum_expr = enum.copy() table_group_expr = table_group.copy() project_expr = project.copy() table_expr.addParseAction(self._parse_table) ref_expr.addParseAction(self._parse_ref_blueprint) enum_expr.addParseAction(self._parse_enum) table_group_expr.addParseAction(self._parse_table_group) project_expr.addParseAction(self._parse_project) expr = (table_expr | ref_expr | enum_expr | table_group_expr | project_expr) self._syntax = expr[...] + ('\n' | comment)[...] + pp.StringEnd()
def parse_ce(trace_data, vs): # lexer rules SEP = pp.Keyword('------------------------').suppress() STEP = pp.Keyword('Step').suppress() HDR_ = (pp.Keyword('Counterexample:') + pp.Keyword('========================') + pp.Keyword('Path') + pp.Keyword('========================')).suppress() # ignore the version information before the HDR_ HDR = pp.SkipTo(HDR_, True).suppress() FOOTER = pp.Keyword('total execution time:') + floats + pp.Keyword('secs') FOOTER.setParseAction(''.join) EOF = pp.StringEnd() LABEL = pp.Keyword('label') # Grammar #LABEL = pp.Keyword('label').suppress() sva = pp.Keyword('--- System Variables (assignments) ---').suppress() # XXX: SAL's default monitor? #bapc = (pp.Keyword('ba-pc!1') + EQUAL + integer).suppress() step_hdr = STEP + integer + COLON assignment = ident + EQUAL + value assignment.setParseAction(Assignment) label = LABEL.suppress() + ident ti = SEP + pp.SkipTo(label, False) + label + pp.SkipTo(SEP, True) ti.setParseAction(extract_label) #step = step_hdr + sva + bapc + pp.OneOrMore(assignment) + pp.Optional(ti) step = step_hdr + sva + pp.OneOrMore(assignment) + pp.Optional(ti, default='') step.setParseAction(Step) #step.setParseAction(Step) trace = (HDR + pp.OneOrMore(step) + pp.Optional(FOOTER, default='') ) # ) + # pp.SkipTo(EOF, True)) trace.setParseAction(functools.partial(Trace, vs)) parsed = trace.parseString(trace_data, parseAll=True) return parsed[0]
def _parseIfDefExpression(ifdefexp): """This function parses a given ifdef-expression and rewrites the expression according to the given __pt mapping. This one is used to make use of a csp solver without using a predicate.""" mal = list() def _rewriteOne(param): """This function returns each one parameter function representation for csp.""" op, ma = param[0] mal.append(ma) if op == '!': ret = op + '(' + ma + ')' if op == 'defined': ret = ma return ret def _rewriteTwo(param): """This function returns each two parameter function representation for csp.""" mal.extend(param[0][0::2]) ret = param[0][1] ret = '(' + ret.join(map(str, param[0][0::2])) + ')' return ret operand = __string | __hexadec | __integer | \ __function | __identifier operators = pypa.oneOf('&& ||') # extend with furhter operators expr = pypa.operatorPrecedence(operand, [ ('defined', 1, pypa.opAssoc.RIGHT, _rewriteOne), ('!', 1, pypa.opAssoc.RIGHT, _rewriteOne), (operators, 2, pypa.opAssoc.LEFT, _rewriteTwo), ]) + pypa.StringEnd() try: rsig = expr.parseString(ifdefexp)[0] except pypa.ParseException, e: print 'ERROR (parse): cannot parse sig (%s) -- (%s)' % ( ifdefexp, e.col, ) return ifdefexp
def _collectIfdefExpressions(fname): ''' This method filters all ifdef expressions out of a file and returns them as a list. ''' def _extractIfdefExpression(tokens): global __ifdefexplist __ifdefexplist += tokens __macro = pypa.Literal('#') \ + pypa.oneOf("if ifdef ifndef elif") \ + pypa.OneOrMore(pypa.Word(pypa.alphanums+"&|><=^")) \ .setParseAction(_extractIfdefExpression) \ + pypa.StringEnd() with open(fname, 'r') as fd: for line in fd.xreadlines(): try: print(__macro.parseString(line)) except pypa.ParseException: pass return __ifdefexplist
def tsql_grammar(): """Defines the expression for the complete TigerSearch query language. A query term is either a node operand, a node relation constraint or node predicate. An expression can be a single term or a conjunction of terms. Toplevel disjunctions are not currently not supported, toplevel disjunction is not supported, because it can always be represented by negations in the relations and node descriptions. The returned expression must match the whole input string. :AST Node: `TsqlExpression` :Example: ``#a:[cat="NP"] & root(#a) and #a > [word="safe"]`` """ atom = (node_predicate() | node_relation_constraint() | NODE_OPERAND) expr = pyparsing.Group(atom + pyparsing.OneOrMore(suppressed_literal(u"&") + atom) ).setParseAction(lambda s, l, t: ast.Conjunction(t.asList()[0])) | atom expr.setParseAction(single_value_holder(ast.TsqlExpression)) return expr + pyparsing.StringEnd()
def parseBib(filename, language): pp.ParserElement.setDefaultWhitespaceChars(" \n\t") entry = returnList( pp.Word('@', pp.alphanums) + sl('{') + pp.Word(pp.alphanums + "_") + sl(',') + CommaList( returnTuple( pp.Word(pp.alphanums) + sl('=') + pp.QuotedString('{', endQuoteChar='}'))) + pp.Suppress(pp.Optional(',')) + sl('}')) r = (pp.ZeroOrMore(entry) | pp.Suppress('#' + pp.ZeroOrMore(pp.CharsNotIn('\n'))) + pp.StringEnd()).parseFile(filename) bibliography = QOpen(os.path.join(language, "bibliography.rst"), 'wt') print >> bibliography, "Bibliography" print >> bibliography, "============" print >> bibliography for _, e in sorted([(str(x[1]), x) for x in r]): (etype, tag, attrs) = str(e[0][1:]), str(e[1]), dict([ (str(a), str(b)) for (a, b) in e[2] ]) representations = { 'article': '$author, "$title". $journal $volume $number, pp $pages ($year)', 'inproceedings': '$author "$title", $booktitle, $year', 'misc': '$author "$title", $year', 'techreport': '$author "$title", $edition, $edition ($year)', } if etype in representations: if 0: print >> bibliography, tag print >> bibliography, "^" * len(tag) print >> bibliography print >> bibliography, ".. [%s] %s" % ( tag, Template(representations[etype]).safe_substitute(attrs)) print >> bibliography bibliography.close()
def make_parser(): """Generate the pyparsing parser for hand strings.""" ranks_str = ''.join(ranks) ranks_str += ranks_str.lower() suits_str = ''.join(suits) suits_str += suits_str.lower() suitedness = pyparsing.Word("os", exact=1).setName("suitedness") card = pyparsing.Word(ranks_str, suits_str, exact=2).setName("card") hand = card * 2 hand.setParseAction(lambda s, loc, toks: ''.join(toks)) digits = pyparsing.Word(pyparsing.nums) natural_number = pyparsing.Word('123456789', pyparsing.nums) decimal = natural_number ^ \ (pyparsing.Optional(pyparsing.Literal('0')) + pyparsing.Literal('.') + digits) ^ \ (natural_number + pyparsing.Literal('.') + digits) ^ \ (natural_number + pyparsing.Literal('.')) decimal.setParseAction(lambda s, loc, toks: ''.join(toks)) weight = pyparsing.Group(decimal + pyparsing.Optional(pyparsing.Literal('%'))) handtype = pyparsing.Word(ranks_str, exact=2) + \ pyparsing.Optional(suitedness) + \ ~pyparsing.FollowedBy(pyparsing.Literal('%') ^ pyparsing.Literal('(')) handtype.setParseAction(lambda s, loc, toks: ''.join(toks)) tag = pyparsing.Literal('#') + pyparsing.Word(pyparsing.alphanums + '_') \ + pyparsing.Literal('#') handtype_group = pyparsing.Group(handtype ^ (handtype + pyparsing.Literal('-') + handtype) ^ (handtype + pyparsing.Literal('+')) ^ hand ^ tag) hand_group_list = pyparsing.Group(pyparsing.delimitedList(handtype_group)) weighted_hand_group_list = pyparsing.Group(( weight + pyparsing.Literal('(').suppress() + hand_group_list + pyparsing.Literal(')').suppress()) ^ hand_group_list) handrange = pyparsing.Optional( pyparsing.delimitedList( weighted_hand_group_list)) + pyparsing.StringEnd() return handrange
class Identifiers: """ This class static variables can be used when defining regex view. For ex: _PFENotificationStatsTable: title: PFE Notification statistics key: name view: _PFENotificationStatsView _PFENotificationStatsView: regex: value: numbers name: words """ printables = pp.OneOrMore(pp.Word(pp.printables)) numbers = (pp.Word(pp.nums) + pp.Optional(pp.Literal('.') + pp.Word( pp.nums))).setParseAction(lambda i: ''.join(i)) hex_numbers = pp.OneOrMore(pp.Word(pp.nums, min=1)) & pp.OneOrMore( pp.Word('abcdefABCDEF', min=1)) word = pp.Word(pp.alphanums) | pp.Word(pp.alphas) words = (pp.OneOrMore(word)).setParseAction(lambda i: ' '.join(i)) percentage = pp.Word(pp.nums) + pp.Literal('%') header_bar = (pp.OneOrMore(pp.Word('-')) | pp.OneOrMore(pp.Word('='))) + \ pp.StringEnd()
def _get_parser(): double_escape = pp.Combine(pp.Literal(_DOUBLE_ESCAPE) + pp.MatchFirst([pp.FollowedBy(_REF_OPEN), pp.FollowedBy(_REF_CLOSE), pp.FollowedBy(_INV_OPEN), pp.FollowedBy(_INV_CLOSE)])).setParseAction(pp.replaceWith(_ESCAPE)) ref_open = pp.Literal(_REF_OPEN).suppress() ref_close = pp.Literal(_REF_CLOSE).suppress() ref_not_open = ~pp.Literal(_REF_OPEN) + ~pp.Literal(_REF_ESCAPE_OPEN) + ~pp.Literal(_REF_DOUBLE_ESCAPE_OPEN) ref_not_close = ~pp.Literal(_REF_CLOSE) + ~pp.Literal(_REF_ESCAPE_CLOSE) + ~pp.Literal(_REF_DOUBLE_ESCAPE_CLOSE) ref_escape_open = pp.Literal(_REF_ESCAPE_OPEN).setParseAction(pp.replaceWith(_REF_OPEN)) ref_escape_close = pp.Literal(_REF_ESCAPE_CLOSE).setParseAction(pp.replaceWith(_REF_CLOSE)) ref_text = pp.CharsNotIn(_REF_EXCLUDES) | pp.CharsNotIn(_REF_CLOSE_FIRST, exact=1) ref_content = pp.Combine(pp.OneOrMore(ref_not_open + ref_not_close + ref_text)) ref_string = pp.MatchFirst([double_escape, ref_escape_open, ref_escape_close, ref_content]).setParseAction(_string) ref_item = pp.Forward() ref_items = pp.OneOrMore(ref_item) reference = (ref_open + pp.Group(ref_items) + ref_close).setParseAction(_reference) ref_item << (reference | ref_string) inv_open = pp.Literal(_INV_OPEN).suppress() inv_close = pp.Literal(_INV_CLOSE).suppress() inv_not_open = ~pp.Literal(_INV_OPEN) + ~pp.Literal(_INV_ESCAPE_OPEN) + ~pp.Literal(_INV_DOUBLE_ESCAPE_OPEN) inv_not_close = ~pp.Literal(_INV_CLOSE) + ~pp.Literal(_INV_ESCAPE_CLOSE) + ~pp.Literal(_INV_DOUBLE_ESCAPE_CLOSE) inv_escape_open = pp.Literal(_INV_ESCAPE_OPEN).setParseAction(pp.replaceWith(_INV_OPEN)) inv_escape_close = pp.Literal(_INV_ESCAPE_CLOSE).setParseAction(pp.replaceWith(_INV_CLOSE)) inv_text = pp.CharsNotIn(_INV_CLOSE_FIRST) inv_content = pp.Combine(pp.OneOrMore(inv_not_close + inv_text)) inv_string = pp.MatchFirst([double_escape, inv_escape_open, inv_escape_close, inv_content]).setParseAction(_string) inv_items = pp.OneOrMore(inv_string) export = (inv_open + pp.Group(inv_items) + inv_close).setParseAction(_invquery) text = pp.CharsNotIn(_EXCLUDES) | pp.CharsNotIn('', exact=1) content = pp.Combine(pp.OneOrMore(ref_not_open + inv_not_open + text)) string = pp.MatchFirst([double_escape, ref_escape_open, inv_escape_open, content]).setParseAction(_string) item = reference | export | string line = pp.OneOrMore(item) + pp.StringEnd() return line
def create_py_parsing_grammar(self): # Keep around all whitespace. pp.ParserElement.setDefaultWhitespaceChars("") def add_element(name: str, value: pp.ParserElement): nonlocal self if self.debug: value.setName(name) value.setDebug() return value # Our grammar is pretty simple. We want to remove all newlines # inside quoted strings, to make the quoted strings JSON # compliant. So our grammar should skip to the first quote while # keeping everything before it as-is, process the quoted string # skip to the next quote, and repeat that until the end of the # file. EOF = add_element("EOF", pp.StringEnd()) SkipToQuote = add_element("SkipToQuote", pp.SkipTo('"')) SkipToEOF = add_element("SkipToEOF", pp.SkipTo(EOF)) def remove_newlines_and_whitespace_in_quoted_string(tokens): first_string = tokens[0] replaced_string = re.sub(r"\n[ ]*", " ", first_string) return replaced_string QuotedString = add_element( "QuotedString", pp.QuotedString(quoteChar='"', multiline=True, unquoteResults=False) ) QuotedString.setParseAction(remove_newlines_and_whitespace_in_quoted_string) QuotedTerm = add_element("QuotedTerm", pp.Optional(SkipToQuote) + QuotedString) Grammar = add_element("Grammar", pp.OneOrMore(QuotedTerm) + SkipToEOF) return Grammar
def build_element(self): """ A helper function to assemble the command parser's top level element. """ command_elements = [] for command in self.commands.values(): command_elements.append(command.build_element()) # Enforce command at string start element = pp.StringStart() # Attempt to match command name only first using lookahead commands_element = pp.MatchFirst(pp.Keyword(c) for c in self.commands) element -= pp.FollowedBy(commands_element) # If the above lookahead element matches, the parser will then look for # one of the full command string. Otherwise, it will stop immediately. element -= pp.MatchFirst(command_elements).setParseAction(self.on_match) # Enforce no extra arguments. element -= pp.StringEnd() return element
def parse_heading_block(heading_block): heading_block_string = " ".join(heading_block.asList()) heading_block_detail = p.And([ p.Suppress(p.SkipTo(date)), date("date"), p.SkipTo(p.Literal("LJA:")).setResultsName("court"), p.Suppress(p.Literal("LJA:")), p.SkipTo(p.Literal("CMU:")).setResultsName("LJA"), p.Suppress(p.Literal("CMU:")), p.SkipTo(p.Literal("Session:")).setResultsName("CMU"), p.Suppress(p.Literal("Session:")), p.SkipTo(p.Literal("Panel:")).setResultsName("Session"), p.Suppress(p.Literal("Panel:")), p.SkipTo(p.Literal("Courtroom:")).setResultsName("Panel"), p.Suppress(p.Literal("Courtroom:")), p.SkipTo(p.Literal("Block:")).setResultsName("Courtroom"), p.Suppress(p.Literal("Block:")), p.SkipTo(p.StringEnd()).setResultsName("Block") ]) data = heading_block_detail.parseString(heading_block_string).asDict() return {key: value.strip() for key, value in data.items()}
import pyparsing as pp from funcy import first from dpath import (select_children, select_all_children, select_all_descendants, compose_selectors, select_text, make_filter) ctx = {"text": select_text} start, stop = pp.StringStart(), pp.StringEnd() sep = pp.Literal("/").suppress() osep = pp.Optional(sep) descendants = pp.Literal("**") children = pp.Literal("*") element = pp.Word(pp.alphanums + "-_") func = pp.Word(pp.alphas, pp.alphanums + "-_") + "()" condition = pp.Forward() # condition and path are mutually recursive segment = (descendants | children | func | element) + condition path = osep + segment + pp.ZeroOrMore(sep + segment) + osep condition << pp.Optional( pp.Literal("[").suppress() + path + pp.Literal("]").suppress()) parser = (start + path + stop) @condition.setParseAction def condition_action(txt, loc, toks):