def transform(txt): idx1 = txt.find('[') idx2 = txt.find('{') if idx1 < idx2 and idx1 > 0: txt = txt[idx1:txt.rfind(']') + 1] elif idx2 < idx1 and idx2 > 0: txt = txt[idx2:txt.rfind('}') + 1] try: json.loads(txt) except: # parse dict-like syntax LBRACK, RBRACK, LBRACE, RBRACE, COLON, COMMA = map(Suppress, "[]{}:,") integer = Regex(r"[+-]?\d+").setParseAction(lambda t: int(t[0])) real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t: float(t[0])) string_ = Word(alphas, alphanums + "_") | quotedString.setParseAction(removeQuotes) bool_ = oneOf("true false").setParseAction(lambda t: t[0] == "true") item = Forward() key = string_ dict_ = LBRACE - Optional(dictOf(key + COLON, item + Optional(COMMA))) + RBRACE list_ = LBRACK - Optional(delimitedList(item)) + RBRACK item << (real | integer | string_ | bool_ | Group(list_ | dict_)) result = item.parseString(txt, parseAll=True)[0] print result txt = result return txt
def equals_pairs_dict(string): toplevel = dictOf(Word(alphas + "_") + Literal("=").suppress(), restOfLine) try: ret = toplevel.parseString(string).asDict() return dict(zip(map(lambda x: x.lower(), ret.keys()), ret.values())) except ParseException: raise VersionFileParseException(string)
def _parse_phoenixline(): """Parse a single line containing a variable definition.""" data = ( Suppress("variable:") + \ Word(alphanums).setResultsName("name") + \ Word(printables).setResultsName("type") + \ oneOf(""" input output """).setResultsName("iotype") + \ dictOf(Word(alphanums), Suppress("=")+ \ (Word(alphanums+","+'-'+'.')|commaSeparatedList)) ) return data
def colon_pairs_dict(string): toplevel = dictOf( Word(alphas + "_") + Literal(": ").suppress(), restOfLine) try: ret = toplevel.parseString(string).asDict() return dict( list(zip([x.lower() for x in list(ret.keys())], list(ret.values())))) except ParseException: raise VersionFileParseException(string)
def colon_pairs_list(string): toplevel = dictOf(Word(alphas + "_") + Literal(": ").suppress(), restOfLine) strings = string.strip().split('\n\n') ret = [] try: for substring in strings: subdict = toplevel.parseString(substring).asDict() ret.append(dict(zip(map(lambda x: x.lower(), subdict.keys()), subdict.values()))) return ret except ParseException: raise OutputParseException(string)
def make_value_section_parser(type_results): """"Create a parser that parses the value section. Because the VALUE section format depends on the types defined in the TYPE section, this method will construct a pyparsing parser that parses the VALUE section based on TYPE section parsed results. Parameters ---------- type_results : pyparsing.ParseResults the parsed results from the type section. """ # construct ClassDef objects from parsed TYPE section. class_def_list = [ClassDef(v) for v in type_results] # In VALUE section, the value has to be one of the class definition defined # in the TYPE section, so we OR all the value parsers together. value_parser = class_def_list[0].parser for cdef in class_def_list[1:]: value_parser |= cdef.parser # There can be an optional PROP section at the end. # NOTE: use setParseAction to keep dictionary structure intact. prop_parser = (pp.Literal('PROP').suppress() + pp.Literal('(').suppress() + prop_dict + pp.Literal(')')).setParseAction(lambda toks: toks.asDict()) # NOTE: Group value and property separately so properties field won't merge # with value field. value_parser = pp.Group(value_parser)('value') + pp.Group(pp.Optional(prop_parser, default={}))('properties') # unwrap the group so asDict() converts correctly. def format_value(toks): return {'value': toks[0][0], 'properties': toks[1][0]} value_parser.setParseAction(format_value) # VALUE section is just a list of variable name followed by its value/properties. val_dict = pp.dictOf(quoted_string, value_parser) # add VALUE and END tags val_section = pp.Literal('VALUE').suppress() + val_dict + pp.Literal('END').suppress() return val_section
def load_js_obj_literal(j): """Terrible hack.""" j = j[j.index('{'):] j = j.replace('\n', '').replace('\t', '') j = j.replace(';', '') j = re.sub(r'//.*?{', r'{', j) LBRACK, RBRACK, LBRACE, RBRACE, COLON, COMMA = map(Suppress,"[]{}:,") integer = Regex(r"[+-]?\d+").setParseAction(lambda t:int(t[0])) real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t:float(t[0])) string_ = Word(alphas,alphanums+"_") | quotedString.setParseAction(removeQuotes) bool_ = oneOf("true false").setParseAction(lambda t: t[0]=="true") item = Forward() key = string_ dict_ = LBRACE - Optional(dictOf(key+COLON, item+Optional(COMMA))) + RBRACE list_ = LBRACK - Optional(delimitedList(item)) + RBRACK item << (real | integer | string_ | bool_ | Group(list_ | dict_ )) result = item.parseString(j,parseAll=True)[0] return result
def makeCsvTokenParser(): num = pp.Combine(pp.Optional('-') + pp.Word(pp.nums)) num.setParseAction(lambda toks: int(toks[0])) floatNum = pp.Combine(pp.Optional('-') + pp.Word(pp.nums) + '.' + pp.Word(pp.nums)) floatNum.setParseAction(lambda toks: float(toks[0])) identifier = pp.Word(pp.alphanums + " -_.?'>+,!") basicToken = num ^ floatNum ^ identifier lst = pp.Forward() dct = pp.Forward() lparen = pp.Suppress('(') rparen = pp.Suppress(')') lstToken = pp.Group(lparen + lst + rparen) dctToken = pp.Group(lparen + dct + rparen) token = basicToken ^ lstToken ^ dctToken singletonLst = token + pp.Suppress('|') multiLst = pp.delimitedList(token, delim='|') lst << pp.Group(singletonLst ^ multiLst) def fn(toks): simpleToks = [simplify(part, True) for part in toks[0]] return [simpleToks] lst.setParseAction(fn) dctKey = identifier + (pp.Suppress(':') ^ pp.StringEnd()) dctVal = token + pp.Suppress(pp.Optional('|')) dct << pp.Group(pp.dictOf(dctKey, dctVal)) dct.setParseAction(lambda toks: {key: simplify(val, True) for key, val in toks[0]}) global result result = basicToken ^ lst ^ dct
def __init__(self): # codec used for encoding of usermessages self.codec = lightpile.codec EOL = pp.LineEnd() SOL = pp.LineStart().leaveWhitespace() blankline = SOL + EOL comment = '#' + pp.restOfLine + EOL self.comment_stripper = comment.setParseAction(pp.replaceWith("\n")) self.blankline_stripper = blankline.setParseAction(pp.replaceWith("")) # filegrammar ws = ' \t' standard_chars = pp.printables.replace('#', '') text = pp.OneOrMore(pp.White(ws) | pp.quotedString | pp.Word(standard_chars)) text.setParseAction(lambda tokens: ''.join(tokens)) lbracket = pp.Literal("[").suppress() rbracket = pp.Literal("]").suppress() # identifiers have to start with a unicode-letter and can continue # with any number of unicode-letters or any of # ':', '%', '+', '-', '_', '.', ',' self.identifier = pp.Regex(r'[^\W\d]([^\W]|[%:_,\+\-\.])*', re.U) sectionhead = (lbracket + self.identifier + rbracket + EOL.suppress()) sectionline = ~sectionhead + ~pp.StringEnd() + pp.restOfLine + EOL sectionlines = pp.ZeroOrMore(sectionline) sectionlines.setParseAction(lambda tokens: ''.join(tokens)) self.filegrammar = pp.dictOf(sectionhead, sectionlines) self._init_sectiongrammars()
from pkgcore.config import dhcpformat, errors from snakeoil import mappings import pyparsing as pyp _section_contents = pyp.Forward() _value = (pyp.Word(pyp.alphanums + './_').setWhitespaceChars(' \t') | pyp.quotedString.copy().setParseAction(pyp.removeQuotes)) _section = pyp.Group( pyp.Suppress('{' + pyp.lineEnd) + _section_contents + pyp.Suppress('}')) # "statement seems to have no effect" # pylint: disable-msg=W0104 _section_contents << pyp.dictOf( _value + pyp.Suppress('='), pyp.Group(pyp.OneOrMore((_value | _section).setWhitespaceChars(' \t'))) + pyp.Suppress(pyp.lineEnd)) parser = ( pyp.stringStart + pyp.dictOf( pyp.Suppress('[') + _value + pyp.Suppress(']' + pyp.lineEnd), _section_contents).ignore(pyp.pythonStyleComment) + pyp.stringEnd) def config_from_file(file_obj): try: config = parser.parseFile(file_obj) except pyp.ParseException as e: name = getattr(file_obj, 'name', file_obj)
def _getGerberGrammar(self): """ Returns the grammar of Gerber """ gerber_dictionary = { "G04": { "text": "comment" }, "G36": { "text": "closed-shape-start" }, "G37": { "text": "closed-shape-end" }, "MO": { "text": "units", "MM": { "text": "mm" }, "IN": { "text": "inch" } }, "AD": { "text": "aperture-definition", "C": { "text": "circle" }, "R": { "text": "rectangle" } }, "FS": { "text": "format" , "L": { "text": "leading-zeros" }, "A": { "text": "absolute" } }, "D01": { "text": "draw"}, "D02": { "text": "move"}, "D03": { "text": "flash"} } # Define grammar using pyparsing space = pyp.Literal(' ') comma = pyp.Literal(',').suppress() # Capture a float string and cast to float floatnum = pyp.Regex(r'([\d\.]+)').setParseAction(lambda t: float(t[0])) # Capture integer string and cast to int integer = pyp.Regex(r'(-?\d+)').setParseAction(lambda t: int(t[0])) # Capture single digit string and cast to int single_digit = pyp.Regex(r'(\d)').setParseAction(lambda t: int(t[0])) aperture = pyp.Literal('D').setParseAction(pyp.replaceWith('aperture')) coord_x = pyp.Literal('X').setParseAction(pyp.replaceWith('x')) coord_y = pyp.Literal('Y').setParseAction(pyp.replaceWith('y')) gcoord = pyp.Regex(r'(-?\d+)') coord_dict = pyp.dictOf((coord_x | coord_y), gcoord) coord_xy = pyp.Group(coord_dict + coord_dict) inst_del = pyp.Literal('%').suppress() # instruction delimeter inst_end = pyp.Literal('*').suppress() # ending suffix cmd_comment = pyp.Literal('G04').setParseAction(pyp.replaceWith('comment')) cmd_closed_shape_start = pyp.Literal('G36') cmd_closed_shape_end = pyp.Literal('G37') cmd_units = pyp.Literal('MO')('gerber-command') cmd_units_opt_mm = pyp.Literal('MM').setParseAction(pyp.replaceWith('mm')) cmd_units_opt_inch = pyp.Literal('IN').setParseAction(pyp.replaceWith('inch')) cmd_format = pyp.Literal('FS')('gerber-command') cmd_format_opt_leading_zeros = pyp.Literal('L').setParseAction(pyp.replaceWith('leading')) cmd_format_opt_trailing_zeros = pyp.Literal('T').setParseAction(pyp.replaceWith('trailing')) cmd_format_opt_absolute = pyp.Literal('A').setParseAction(pyp.replaceWith('absolute')) cmd_format_opt_incremental = pyp.Literal('I').setParseAction(pyp.replaceWith('incremental')) # Aperture definition cmd_ap_def = pyp.Literal('AD')('gerber-command') cmd_ap_def_num = 'D' + integer.setResultsName('number') cmd_ap_def_opt_circ = pyp.Literal('C').setParseAction(pyp.replaceWith('circle')) cmd_ap_def_opt_rect = pyp.Literal('R').setParseAction(pyp.replaceWith('rect')) cmd_polarity = pyp.Literal('LP')('gerber-command') cmd_polarity_opt_dark = pyp.Literal('D').setParseAction(pyp.replaceWith('dark')) cmd_polarity_opt_clear = pyp.Literal('C').setParseAction(pyp.replaceWith('clear')) cmd_linear_int = pyp.Literal('G01').suppress() # lineal interpolation cmd_circ_int_cw = pyp.Literal('G02').suppress() # circular int. clockwise cmd_circ_int_ccw = pyp.Literal('G03').suppress() # circular int. counter-clockwise aperture_type = (((cmd_ap_def_opt_circ('type') + comma) + (floatnum)('diameter') + 'X') | ((cmd_ap_def_opt_rect('type') + comma) + (floatnum)('width') + 'X' + (floatnum)('height'))) polarity_type = (cmd_polarity_opt_clear | cmd_polarity_opt_dark)('polarity') units_type = (cmd_units_opt_mm | cmd_units_opt_inch)('units') format_zeros = ((cmd_format_opt_leading_zeros('zeros')) | (cmd_format_opt_trailing_zeros('zeros'))) format_notation = ((cmd_format_opt_absolute('notation')) | (cmd_format_opt_incremental('notation'))) format_data = (single_digit)('integer') + single_digit('decimal') # comments (suppress) comment = (cmd_comment + pyp.Optional(space) + pyp.Regex(r"([^\*]+)?") + pyp.Optional(space) + inst_end).suppress() units = (inst_del + pyp.Group(cmd_units + units_type)('units') + inst_end + inst_del) gformat = (inst_del + pyp.Group(cmd_format + format_zeros + format_notation + 'X' + pyp.Group(format_data)('x') + 'Y' + pyp.Group(format_data)('y'))('format') + inst_end + inst_del) ap_def = (inst_del + pyp.Group(cmd_ap_def + cmd_ap_def_num + aperture_type)('aperture_definition') + inst_end + inst_del) polarity = (inst_del + pyp.Group(cmd_polarity + polarity_type)('polarity_change') + inst_end + inst_del) closed_shape_start = (cmd_closed_shape_start('start_closed_shape') + inst_end) closed_shape_end = (cmd_closed_shape_end('end_closed_shape') + inst_end) draw = pyp.Group(pyp.Optional(cmd_linear_int) + 'X' + (integer)('x') + 'Y' + (integer)('y') + pyp.Literal('D01').suppress() + inst_end)('draw') move = pyp.Group(pyp.Optional(cmd_linear_int) + 'X' + (integer)('x') + 'Y' + (integer)('y') + pyp.Literal('D02').suppress() + inst_end)('move') flash = pyp.Group(pyp.Optional(cmd_linear_int) + 'X' + (integer)('x') + 'Y' + (integer)('y') + pyp.Literal('D03').suppress() + inst_end)('flash') aperture_change = (pyp.Literal('D').suppress() + pyp.Group(integer('number') + inst_end)('aperture_change')) # end of file (suppress) the_end = (pyp.Literal('M02') + inst_end)('end_of_gerber') grammar = (comment | units | gformat | ap_def | aperture_change | draw | move | flash | polarity | closed_shape_start | closed_shape_end | the_end) return pyp.OneOrMore(pyp.Group(grammar))
def _attlist(): dict = dictOf(Word(alphas), Suppress("=") + quotedString) return dict
import pyparsing as pyp from snakeoil import mappings from pkgcore.config import dhcpformat, errors _section_contents = pyp.Forward() _value = (pyp.Word(pyp.alphanums + './_').setWhitespaceChars(' \t') | pyp.quotedString.copy().setParseAction(pyp.removeQuotes)) _section = pyp.Group( pyp.Suppress('{' + pyp.lineEnd) + _section_contents + pyp.Suppress('}')) # "statement seems to have no effect" # pylint: disable-msg=W0104 _section_contents << pyp.dictOf( _value + pyp.Suppress('='), pyp.Group(pyp.OneOrMore((_value | _section).setWhitespaceChars(' \t'))) + pyp.Suppress(pyp.lineEnd)) parser = (pyp.stringStart + pyp.dictOf( pyp.Suppress('[') + _value + pyp.Suppress(']' + pyp.lineEnd), _section_contents).ignore(pyp.pythonStyleComment) + pyp.stringEnd) def config_from_file(file_obj): try: config = parser.parseFile(file_obj) except pyp.ParseException as e: name = getattr(file_obj, 'name', file_obj) raise errors.ConfigurationError(f'{name}: {e}') from e def build_section(name):
import pyparsing as pp key = pp.LineStart().suppress() + pp.Word(pp.alphanums) + pp.Suppress(':') value = pp.restOfLine + pp.LineEnd().suppress() kvParser = pp.dictOf(key, value) ### session_name = pp.OneOrMore(pp.Word(pp.alphanums)) session = (pp.LineStart().suppress() + pp.Suppress('[') + session_name + pp.Suppress(']') + pp.LineEnd().suppress()) zeroOrMoreEmptyLines = pp.ZeroOrMore(pp.LineEnd().suppress()) line = pp.LineStart() \ + pp.SkipTo(pp.LineEnd(), failOn=session) \ + pp.LineEnd().suppress() lines = pp.Group(pp.ZeroOrMore(line.leaveWhitespace())) contentParser = pp.dictOf(session, lines) Parser = pp.Keyword('[Metadata]').suppress() \ + pp.LineEnd().suppress() \ + kvParser.setResultsName('Metadata') \ + pp.Keyword('[Tags]').suppress() \ + pp.LineEnd().suppress() \ + kvParser.setResultsName('Tags') \ + pp.Group(contentParser).setResultsName('Content') def parse(fc): return Parser.parseString(fc)
definition = definition + contents('value') if post is not None: definition = definition + post return definition + RCURLY def make_param_block(tag_type, contents): return make_named_block(tag_type, pre=attrs, contents=contents) param_bool = make_param_block('parambool', Optional(bool_)) param_long = make_param_block('paramlong', Optional(int_num)) param_string = make_param_block('paramstring', Optional(quoted_multi)) # Recursive definition of blocks, can include param_map param_block = Forward() array_default = dictOf(make_literal_tag('default'), param_block) param_array = make_named_block('paramarray', pre=attrs + array_default, contents=_spa(Optional(list_value), lambda s, l, t: [t[0]])) # Not sure what value type should be for paramchoice param_choice = make_param_block('paramchoice', Optional(quoted_multi)) param_map = make_param_block('parammap', ZeroOrMore(Group(param_block))) # Fancy functor and service stuff def make_args_block(tag_type): return (make_named_tag(tag_type) + LCURLY + list_entries + RCURLY)
__all__ = ("config_from_file",) from snakeoil import mappings, modules import pyparsing as pyp from pkgcore.config import basics, errors # this is based on the 'BIND named.conf parser' on pyparsing's webpage _section = pyp.Forward() _value = (pyp.Word(pyp.alphanums + './_') | pyp.quotedString.copy().setParseAction(pyp.removeQuotes)) _section_contents = pyp.dictOf( _value, pyp.Group(pyp.OneOrMore(_value | _section)) + pyp.Suppress(';')) # "statement seems to have no effect" # pylint: disable-msg=W0104 _section << pyp.Group(pyp.Suppress('{') + _section_contents + pyp.Suppress('}')) parser = ( pyp.stringStart + pyp.dictOf(_value, _section).ignore(pyp.pythonStyleComment) + pyp.stringEnd) class ConfigSection(basics.ConfigSection): """Expose a section_contents from pyparsing as a ConfigSection.
notted_predicate = Literal('(').suppress() + Keyword('not') + predicate + Literal(')').suppress() # a set of ground atoms/predicates is a state, they are all presumed to be ANDed together (conjunction) state_conjunction_implicit = OneOrMore(ground_predicate) state_conjunction_explicit = (Literal('(') + Keyword('and')).suppress() + state_conjunction_implicit + Literal(')').suppress() state = state_conjunction_explicit | state_conjunction_implicit function_arguments = Literal('(').suppress() + Group(OneOrMore(variable)) + Literal(')').suppress() expr = Literal('(').suppress() + Group(identifier + OneOrMore(variable)) + Literal(')').suppress() notted_expr = Literal('(').suppress() + Keyword('not') + expr + Literal(')').suppress() expr_set = Literal('(').suppress() + OneOrMore(expr) + Literal(')').suppress() init = Literal(':').suppress() + Keyword('init') # (:requirements :strips) goal = Literal(':').suppress() + Keyword('goal') # (:requirements :typing) init_goal_states = dictOf(Literal('(').suppress() + (init | goal), state + Literal(')').suppress() + comment ) s = r'''(:init (S B B) (S C B) (S A C) (R B B) (R C B)) (:goal (and (S A A)))''' print('Input strips string:') print(s) parsed_states = init_goal_states.parseString(s) print('parsed init state:') print(parsed_states.asDict()) problem_name = (Literal('(') + Keyword('problem')).suppress() + identifier + Literal(')').suppress() problem_domain = (Literal('(') + Keyword(':domain')).suppress() + identifier + Literal(')').suppress() problem = comment + ( comment + (Literal('(') + Keyword('define')).suppress() + problem_name + problem_domain
# hash reaction entry hashed_reaction_section = Group(hashsymbol + Suppress(define_reactions_) + lbrace + OneOrMore(reaction_definition) + rbrace) # hash observable entry count_definition = Group(count_ + lbracket + species_definition.setResultsName('speciesPattern') + Suppress(',') + identifier + rbracket) observable_entry = Group(lbrace + Group(delimitedList(count_definition, delim='+')).setResultsName('patterns') + rbrace + Suppress('=>') + quotedString.setResultsName('outputfile')) bngobservable_entry = Group((species_ | molecules_).setResultsName('obskey') + identifier.setResultsName('obsname') + Group(delimitedList(species_definition, delim=',')).setResultsName('obspatterns')) hashed_observable_section = Group(hashsymbol + Suppress(reaction_data_output_) + lbrace + OneOrMore(statement).setResultsName('statements') + ZeroOrMore(observable_entry).setResultsName('mdlobs') + ZeroOrMore(bngobservable_entry).setResultsName('bngobs') + rbrace) # hash initialization entry key = identifier + Suppress('=') value = restOfLine release_site_definition = Group(identifier.setResultsName('name') + release_site_ + lbrace + dictOf(key, value).setResultsName('entries') + rbrace) object_definition = Group(identifier.setResultsName('compartmentName') + Suppress(object_) + (bracketidentifier | identifier) + (nestedExpr('{', '}', content=statement)).setResultsName('compartmentOptions')) hashed_initialization_section = Group(hashsymbol + Suppress(instantiate_) + identifier.setResultsName('name') + identifier.setResultsName('type') + lbrace + Group(ZeroOrMore(release_site_definition | object_definition)).setResultsName('entries') + rbrace) other_sections = section_enclosure_ # statement = Group(identifier + equal + (quotedString | OneOrMore(mathElements))) + Suppress(LineEnd() | StringEnd()) grammar = ZeroOrMore(Suppress(other_sections) | Suppress(statement) | hashed_system_constants.setResultsName('systemConstants') | hashed_molecule_section.setResultsName('molecules') | hashed_reaction_section.setResultsName('reactions') | hashed_observable_section.setResultsName('observables') | hashed_initialization_section.setResultsName('initialization') | hashed_function_section.setResultsName('math_functions') # | Suppress(hashed_section) ) nonhashedgrammar = ZeroOrMore(Suppress(statement) | Suppress(hashed_section) | Dict(other_sections))
bngobservable_entry = Group((species_ | molecules_).setResultsName('obskey') + identifier.setResultsName('obsname') + Group(delimitedList(species_definition, delim=',') ).setResultsName('obspatterns')) hashed_observable_section = Group( hashsymbol + Suppress(reaction_data_output_) + lbrace + OneOrMore(statement).setResultsName('statements') + ZeroOrMore(observable_entry).setResultsName('mdlobs') + ZeroOrMore(bngobservable_entry).setResultsName('bngobs') + rbrace) # hash initialization entry key = identifier + Suppress('=') value = restOfLine release_site_definition = Group( identifier.setResultsName('name') + release_site_ + lbrace + dictOf(key, value).setResultsName('entries') + rbrace) object_definition = Group( identifier.setResultsName('compartmentName') + Suppress(object_) + (bracketidentifier | identifier) + (nestedExpr('{', '}', content=statement) ).setResultsName('compartmentOptions')) hashed_initialization_section = Group( hashsymbol + Suppress(instantiate_) + identifier.setResultsName('name') + identifier.setResultsName('type') + lbrace + Group(ZeroOrMore(release_site_definition | object_definition)).setResultsName('entries') + rbrace) other_sections = section_enclosure_ #statement = Group(identifier + equal + (quotedString | OneOrMore(mathElements))) + Suppress(LineEnd() | StringEnd()) grammar = ZeroOrMore( Suppress(other_sections) | Suppress(statement) | hashed_system_constants.setResultsName('systemConstants')
def get_parser(self): self.clear_stack() if not self._parser: # use CaselessKeyword for e and pi, to avoid accidentally matching # functions that start with 'e' or 'pi' (such as 'exp'); Keyword # and CaselessKeyword only match whole words e = CaselessKeyword("E") pi = CaselessKeyword("PI") # fnumber = Combine(Word("+-"+nums, nums) + # Optional("." + Optional(Word(nums))) + # Optional(e + Word("+-"+nums, nums))) # or use provided pyparsing_common.number, but convert back to str: # fnumber = ppc.number().addParseAction(lambda t: str(t[0])) fnumber = Regex(r"[+-]?(?:\d+|\.\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?") ge_urn = Combine( Literal("urn:great_expectations:") + Word(alphas, alphanums + "_$:?=%.&")) variable = Word(alphas, alphanums + "_$") ident = ge_urn | variable plus, minus, mult, div = map(Literal, "+-*/") lpar, rpar = map(Suppress, "()") addop = plus | minus multop = mult | div expop = Literal("^") expr = Forward() expr_list = delimitedList(Group(expr)) # We will allow functions either to accept *only* keyword # expressions or *only* non-keyword expressions # define function keyword arguments key = Word(alphas + "_") + Suppress("=") # value = (fnumber | Word(alphanums)) value = expr keyval = dictOf(key.setParseAction(self.push_first), value) kwarglist = delimitedList(keyval) # add parse action that replaces the function identifier with a (name, number of args, has_fn_kwargs) tuple # 20211009 - JPC - Note that it's important that we consider kwarglist # first as part of disabling backtracking for the function's arguments fn_call = (ident + lpar + rpar).setParseAction( lambda t: t.insert(0, (t.pop(0), 0, False))) | ( (ident + lpar - Group(expr_list) + rpar).setParseAction( lambda t: t.insert(0, (t.pop(0), len(t[0]), False))) ^ (ident + lpar - Group(kwarglist) + rpar).setParseAction( lambda t: t.insert(0, (t.pop(0), len(t[0]), True)))) atom = (addop[...] + ((fn_call | pi | e | fnumber | ident).setParseAction(self.push_first) | Group(lpar + expr + rpar))).setParseAction( self.push_unary_minus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left # exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor <<= atom + (expop + factor).setParseAction( self.push_first)[...] term = factor + (multop + factor).setParseAction( self.push_first)[...] expr <<= term + (addop + term).setParseAction(self.push_first)[...] self._parser = expr return self._parser
appname = Word(printables, min=1, max=255) # procname - web.1 procname = Word(printables, min=1, max=255) # msg - State changed from starting to up msg = restOfLine heroku_syslog_message = (pri.setResultsName('pri') + version.setResultsName('version') + SPACE + timestamp.setResultsName('timestamp') + SPACE + hostname.setResultsName('hostname') + SPACE + appname.setResultsName('appname') + SPACE + procname.setResultsName('procname') + SPACE + DASH + SPACE + msg.setResultsName('msg') + lineEnd) # Parse "key=value key=value key="value" key='value'" into a dict attr_label = Word(srange('[a-zA-Z0-9_]')) attr_value = Combine( Suppress('=') + STRIP_QUOTES + Word(printables) + STRIP_QUOTES ) parse_dict = dictOf(attr_label, attr_value) __all__ = ['heroku_syslog_message', 'parse_dict']
_parse_type("str")) string_value = quoted_string | unquoted_string single_value = bool_value | float_value | string_value | int_value list_value = pp.Group( pp.Suppress("[") + pp.Optional(pp.delimitedList(single_value)) + pp.Suppress("]")).setParseAction(_parse_type("list")) # because this is a recursive construct, a dict can contain dicts in values dict_value = pp.Forward() value = list_value ^ single_value ^ dict_value dict_key_value = pp.dictOf(string_value + pp.Suppress(":"), value) dict_value << pp.Group( pp.Suppress("{") + pp.delimitedList(dict_key_value) + pp.Suppress("}")).setParseAction(_parse_type("dict")) # Positionals must be end of line or has a space (or more) afterwards. # This is to ensure that the parser treats text like "something=" as invalid # instead of parsing this as positional "something" and leaving the "=" as # invalid on its own. positionals = pp.ZeroOrMore(value + ( pp.StringEnd() ^ pp.Suppress(pp.OneOrMore(pp.White())))).setResultsName("positionals") key_value = pp.Dict( pp.ZeroOrMore(pp.Group(identifier + pp.Suppress("=") +
def _getGerberGrammar(self): """ Returns the grammar of Gerber """ gerber_dictionary = { "G04": { "text": "comment" }, "G36": { "text": "closed-shape-start" }, "G37": { "text": "closed-shape-end" }, "MO": { "text": "units", "MM": { "text": "mm" }, "IN": { "text": "inch" } }, "AD": { "text": "aperture-definition", "C": { "text": "circle" }, "R": { "text": "rectangle" } }, "FS": { "text": "format", "L": { "text": "leading-zeros" }, "A": { "text": "absolute" } }, "D01": { "text": "draw" }, "D02": { "text": "move" }, "D03": { "text": "flash" } } # Define grammar using pyparsing space = pyp.Literal(' ') comma = pyp.Literal(',').suppress() # Capture a float string and cast to float floatnum = pyp.Regex(r'([\d\.]+)').setParseAction( lambda t: float(t[0])) # Capture integer string and cast to int integer = pyp.Regex(r'(-?\d+)').setParseAction(lambda t: int(t[0])) # Capture single digit string and cast to int single_digit = pyp.Regex(r'(\d)').setParseAction(lambda t: int(t[0])) aperture = pyp.Literal('D').setParseAction(pyp.replaceWith('aperture')) coord_x = pyp.Literal('X').setParseAction(pyp.replaceWith('x')) coord_y = pyp.Literal('Y').setParseAction(pyp.replaceWith('y')) gcoord = pyp.Regex(r'(-?\d+)') coord_dict = pyp.dictOf((coord_x | coord_y), gcoord) coord_xy = pyp.Group(coord_dict + coord_dict) inst_del = pyp.Literal('%').suppress() # instruction delimeter inst_end = pyp.Literal('*').suppress() # ending suffix cmd_comment = pyp.Literal('G04').setParseAction( pyp.replaceWith('comment')) cmd_closed_shape_start = pyp.Literal('G36') cmd_closed_shape_end = pyp.Literal('G37') cmd_units = pyp.Literal('MO')('gerber-command') cmd_units_opt_mm = pyp.Literal('MM').setParseAction( pyp.replaceWith('mm')) cmd_units_opt_inch = pyp.Literal('IN').setParseAction( pyp.replaceWith('inch')) cmd_format = pyp.Literal('FS')('gerber-command') cmd_format_opt_leading_zeros = pyp.Literal('L').setParseAction( pyp.replaceWith('leading')) cmd_format_opt_trailing_zeros = pyp.Literal('T').setParseAction( pyp.replaceWith('trailing')) cmd_format_opt_absolute = pyp.Literal('A').setParseAction( pyp.replaceWith('absolute')) cmd_format_opt_incremental = pyp.Literal('I').setParseAction( pyp.replaceWith('incremental')) # Aperture definition cmd_ap_def = pyp.Literal('AD')('gerber-command') cmd_ap_def_num = 'D' + integer.setResultsName('number') cmd_ap_def_opt_circ = pyp.Literal('C').setParseAction( pyp.replaceWith('circle')) cmd_ap_def_opt_rect = pyp.Literal('R').setParseAction( pyp.replaceWith('rect')) cmd_polarity = pyp.Literal('LP')('gerber-command') cmd_polarity_opt_dark = pyp.Literal('D').setParseAction( pyp.replaceWith('dark')) cmd_polarity_opt_clear = pyp.Literal('C').setParseAction( pyp.replaceWith('clear')) cmd_linear_int = pyp.Literal('G01').suppress() # lineal interpolation cmd_circ_int_cw = pyp.Literal( 'G02').suppress() # circular int. clockwise cmd_circ_int_ccw = pyp.Literal( 'G03').suppress() # circular int. counter-clockwise aperture_type = (((cmd_ap_def_opt_circ('type') + comma) + (floatnum)('diameter') + 'X') | ((cmd_ap_def_opt_rect('type') + comma) + (floatnum)('width') + 'X' + (floatnum)('height'))) polarity_type = (cmd_polarity_opt_clear | cmd_polarity_opt_dark)('polarity') units_type = (cmd_units_opt_mm | cmd_units_opt_inch)('units') format_zeros = ((cmd_format_opt_leading_zeros('zeros')) | (cmd_format_opt_trailing_zeros('zeros'))) format_notation = ((cmd_format_opt_absolute('notation')) | (cmd_format_opt_incremental('notation'))) format_data = (single_digit)('integer') + single_digit('decimal') # comments (suppress) comment = (cmd_comment + pyp.Optional(space) + pyp.Regex(r"([^\*]+)?") + pyp.Optional(space) + inst_end).suppress() units = (inst_del + pyp.Group(cmd_units + units_type)('units') + inst_end + inst_del) gformat = (inst_del + pyp.Group(cmd_format + format_zeros + format_notation + 'X' + pyp.Group(format_data)('x') + 'Y' + pyp.Group(format_data)('y'))('format') + inst_end + inst_del) ap_def = (inst_del + pyp.Group(cmd_ap_def + cmd_ap_def_num + aperture_type)('aperture_definition') + inst_end + inst_del) polarity = ( inst_del + pyp.Group(cmd_polarity + polarity_type)('polarity_change') + inst_end + inst_del) closed_shape_start = (cmd_closed_shape_start('start_closed_shape') + inst_end) closed_shape_end = (cmd_closed_shape_end('end_closed_shape') + inst_end) draw = pyp.Group( pyp.Optional(cmd_linear_int) + 'X' + (integer)('x') + 'Y' + (integer)('y') + pyp.Literal('D01').suppress() + inst_end)('draw') move = pyp.Group( pyp.Optional(cmd_linear_int) + 'X' + (integer)('x') + 'Y' + (integer)('y') + pyp.Literal('D02').suppress() + inst_end)('move') flash = pyp.Group( pyp.Optional(cmd_linear_int) + 'X' + (integer)('x') + 'Y' + (integer)('y') + pyp.Literal('D03').suppress() + inst_end)('flash') aperture_change = ( pyp.Literal('D').suppress() + pyp.Group(integer('number') + inst_end)('aperture_change')) # end of file (suppress) the_end = (pyp.Literal('M02') + inst_end)('end_of_gerber') grammar = (comment | units | gformat | ap_def | aperture_change | draw | move | flash | polarity | closed_shape_start | closed_shape_end | the_end) return pyp.OneOrMore(pyp.Group(grammar))
from snakeoil import mappings from pkgcore.config import dhcpformat, errors _section_contents = pyp.Forward() _value = pyp.Word(pyp.alphanums + "./_").setWhitespaceChars(" \t") | pyp.quotedString.copy().setParseAction( pyp.removeQuotes ) _section = pyp.Group(pyp.Suppress("{" + pyp.lineEnd) + _section_contents + pyp.Suppress("}")) # "statement seems to have no effect" # pylint: disable-msg=W0104 _section_contents << pyp.dictOf( _value + pyp.Suppress("="), pyp.Group(pyp.OneOrMore((_value | _section).setWhitespaceChars(" \t"))) + pyp.Suppress(pyp.lineEnd), ) parser = ( pyp.stringStart + pyp.dictOf(pyp.Suppress("[") + _value + pyp.Suppress("]" + pyp.lineEnd), _section_contents).ignore( pyp.pythonStyleComment ) + pyp.stringEnd ) def config_from_file(file_obj): try: config = parser.parseFile(file_obj) except pyp.ParseException as e:
) + state_conjunction_implicit + Literal(')').suppress() state = state_conjunction_explicit | state_conjunction_implicit function_arguments = Literal('(').suppress() + Group( OneOrMore(variable)) + Literal(')').suppress() expr = Literal('(').suppress() + Group( identifier + OneOrMore(variable)) + Literal(')').suppress() notted_expr = Literal('(').suppress() + Keyword('not') + expr + Literal( ')').suppress() expr_set = Literal('(').suppress() + OneOrMore(expr) + Literal(')').suppress() init = Literal(':').suppress() + Keyword('init') # (:requirements :strips) goal = Literal(':').suppress() + Keyword('goal') # (:requirements :typing) init_goal_states = dictOf( Literal('(').suppress() + (init | goal), state + Literal(')').suppress() + comment) s = r'''(:init (S B B) (S C B) (S A C) (R B B) (R C B)) (:goal (and (S A A)))''' print('Input strips string:') print(s) parsed_states = init_goal_states.parseString(s) print('parsed init state:') print(parsed_states.asDict()) problem_name = (Literal('(') + Keyword('problem') ).suppress() + identifier + Literal(')').suppress() problem_domain = (Literal('(') + Keyword(':domain') ).suppress() + identifier + Literal(')').suppress()
def create_parser(): """creates stomp message parser implementation of stomp's BNF specification: NULL = <US-ASCII null (octet 0)> LF = <US-ASCII line feed (aka newline) (octet 10)> CR = <US-ASCII carriage return (octet 13)> EOL = [CR] LF OCTET = <any 8-bit sequence of data> frame-stream = 1*frame frame = command EOL *( header EOL ) EOL *OCTET NULL *( EOL ) command = client-command | server-command client-command = "SEND" | "SUBSCRIBE" | "UNSUBSCRIBE" | "BEGIN" | "COMMIT" | "ABORT" | "ACK" | "NACK" | "DISCONNECT" | "CONNECT" | "STOMP" server-command = "CONNECTED" | "MESSAGE" | "RECEIPT" | "ERROR" header = header-name ":" header-value header-name = 1*<any OCTET except CR or LF or ":"> header-value = *<any OCTET except CR or LF or ":"> """ ParserElement.setDefaultWhitespaceChars('') delimiters = '\0\r\n:' NULL, CR, LF, COLON = map(Suppress, delimiters) EOL = Optional(CR) + LF command = oneOf(CLIENT_COMMANDS) ^ oneOf(SERVER_COMMANDS) header_name = CharsNotIn(delimiters[1:]) header_value = Optional(header_name) headers = dictOf(header_name + COLON, header_value + EOL) return StringStart()\ + command + EOL\ + headers + EOL\ + SkipTo(NULL + ZeroOrMore(EOL) + StringEnd())
# hash reaction entry hashed_reaction_section = Group(hashsymbol + Suppress(define_reactions_) + lbrace + OneOrMore(reaction_definition) + rbrace) # hash observable entry count_definition = Group(count_ + lbracket + species_definition.setResultsName('speciesPattern') + Suppress(',') + identifier + rbracket) observable_entry = Group(lbrace + Group(delimitedList(count_definition, delim='+')).setResultsName('patterns') + rbrace + Suppress('=>') + quotedString.setResultsName('outputfile')) bngobservable_entry = Group((species_ | molecules_).setResultsName('obskey') + identifier.setResultsName('obsname') + Group(delimitedList(species_definition, delim=',')).setResultsName('obspatterns')) hashed_observable_section = Group(hashsymbol + Suppress(reaction_data_output_) + lbrace + OneOrMore(statement).setResultsName('statements') + ZeroOrMore(observable_entry).setResultsName('mdlobs') + ZeroOrMore(bngobservable_entry).setResultsName('bngobs') + rbrace) # hash initialization entry key = identifier + Suppress('=') value = restOfLine release_site_definition = Group(identifier.setResultsName('name') + release_site_ + lbrace + dictOf(key,value).setResultsName('entries') + rbrace) object_definition = Group(identifier.setResultsName('compartmentName') + Suppress(object_) + (bracketidentifier | identifier) + (nestedExpr('{', '}',content=statement)).setResultsName('compartmentOptions')) hashed_initialization_section = Group(hashsymbol + Suppress(instantiate_) + identifier.setResultsName('name') + identifier.setResultsName('type') + lbrace + Group(ZeroOrMore(release_site_definition | object_definition)).setResultsName('entries') + rbrace ) other_sections = section_enclosure_ #statement = Group(identifier + equal + (quotedString | OneOrMore(mathElements))) + Suppress(LineEnd() | StringEnd()) grammar = ZeroOrMore(Suppress(other_sections) | Suppress(statement) | hashed_system_constants.setResultsName('systemConstants') | hashed_molecule_section.setResultsName('molecules') | hashed_reaction_section.setResultsName('reactions') | hashed_observable_section.setResultsName('observables') | hashed_initialization_section.setResultsName('initialization') | hashed_function_section.setResultsName('math_functions') #| Suppress(hashed_section) ) nonhashedgrammar = ZeroOrMore(Suppress(statement) | Suppress(hashed_section) | Dict(other_sections))
from __future__ import (absolute_import, division, print_function, unicode_literals) # noinspection PyCompatibility from builtins import * import pyparsing as pp # header section definitions # a quoted string. quoted_string = pp.QuotedString('"', escChar='\\', unquoteResults=True) # a floating point number or integer. replace string by float number = pp.Regex('[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?').setParseAction(lambda toks: [float(toks[0])]) # property values can either be a string or an int/double. prop_value = quoted_string | number # property dictionary is a list of name/value pairs prop_dict = pp.dictOf(quoted_string, prop_value) # header section is HEADER followed by properties. header_section = pp.Literal("HEADER").suppress() + prop_dict # type section definitions # type length is either * for variable, or DOUBLE/SINGLE. type_length = pp.Literal('*') | pp.Literal('DOUBLE') | pp.Literal('SINGLE') # type is either string or float type_name = pp.Literal('STRING') | pp.Literal('FLOAT') # an array type is defined by presence of the string "ARRAY ( * )" just return a single True token. is_array = (pp.Literal('ARRAY') + pp.Literal('(') + pp.Literal('*') + pp.Literal(')')).setParseAction(lambda toks: [True]) # a type definition is an optional array identifier, followed by type name and length. type_info = (pp.Optional(is_array, default=False)('is_array') + type_name('type_name') + type_length('type_len'))
import pyparsing as pp key = pp.LineStart().suppress() + pp.Word(pp.alphanums) + pp.Suppress(':') value = pp.restOfLine + pp.LineEnd().suppress() kvParser = pp.dictOf(key, value) ### session_name = pp.OneOrMore(pp.Word(pp.alphanums)) session = (pp.LineStart().suppress() + pp.Suppress('[') + session_name + pp.Suppress(']') + pp.LineEnd().suppress()) zeroOrMoreEmptyLines = pp.ZeroOrMore(pp.LineEnd().suppress()) line = pp.LineStart() \ + pp.SkipTo(pp.LineEnd(), failOn=session) \ + pp.LineEnd().suppress() lines = pp.Group(pp.ZeroOrMore(line.leaveWhitespace())) contentParser = pp.dictOf(session, lines) Parser = pp.Keyword('[Metadata]').suppress() \ + pp.LineEnd().suppress() \ + kvParser.setResultsName('Metadata') \ + pp.Keyword('[Tags]').suppress() \ + pp.LineEnd().suppress() \ + kvParser.setResultsName('Tags') \ + pp.Group(contentParser).setResultsName('Content')
value = (pp.Word(pp.alphanums + '-_.*') | pp.QuotedString('"') | number).setResultsName('value') # list of numbers nl = pp.delimitedList(number, combine=True) narr = pp.Combine('[' + nl + ']') # function arguments arglist = pp.delimitedList(number | (pp.Word(pp.alphanums + '-_') + pp.NotAny('=')) | narr) args = pp.Group(arglist).setResultsName('args') # function keyword arguments key = pp.Word(pp.alphas) + pp.Suppress('=') values = (number | pp.Word(pp.alphas)) keyval = pp.dictOf(key, values) kwarglist = pp.delimitedList(keyval) kwargs = pp.Group(kwarglist).setResultsName('kwargs') # build generic function fxn_args = pp.Optional(args) + pp.Optional(kwargs) fxn_name = (pp.Word(pp.alphas)).setResultsName('fxn') fxn = pp.Group(fxn_name + LPAR + fxn_args + RPAR) # overall (recursvie) where clause whereexp = pp.Forward() # condition condition = pp.Group(name + operator + value).setResultsName('condition') condition.setParseAction(Condition) # between condition
_parse_type("str)")) string_value = quoted_string | unquoted_string single_value = bool_value | float_value | int_value | string_value list_value = pp.Group( pp.Suppress("[") + pp.Optional(pp.delimitedList(single_value)) + pp.Suppress("]")).setParseAction(_parse_type("list")) # because this is a recursive construct, a dict can contain dicts in values dict_value = pp.Forward() value = list_value ^ single_value ^ dict_value dict_key_value = pp.dictOf(string_value + pp.Suppress(":"), value) dict_value << pp.Group( pp.Suppress("{") + pp.delimitedList(dict_key_value) + pp.Suppress("}")).setParseAction(_parse_type("dict")) # Positionals must be end of line or has a space (or more) afterwards. # This is to ensure that the parser treats text like "something=" as invalid # instead of parsing this as positional "something" and leaving the "=" as # invalid on its own. positionals = pp.ZeroOrMore(value + ( pp.StringEnd() ^ pp.Suppress(pp.OneOrMore(pp.White())))).setResultsName("positionals") key_value = pp.ZeroOrMore( pp.dictOf(identifier + pp.Suppress("="), value).setResultsName("kv"))
name = pp.Word(pp.alphas + '._', pp.alphanums + '._').setResultsName('parameter') #operator = pp.Regex("==|!=|<=|>=|<|>|=|&|~|||").setResultsName('operator') operator = pp.oneOf(['==', '<=', '<', '>', '>=', '=', '!=', '&', '|']).setResultsName('operator') value = (pp.Word(pp.alphanums + '-_.*') | pp.QuotedString('"') | number).setResultsName('value') # list of numbers nl = pp.delimitedList(number, combine=True) narr = pp.Combine('[' + nl + ']') # function arguments arglist = pp.delimitedList(number | (pp.Word(pp.alphanums + '-_') + pp.NotAny('=')) | narr) args = pp.Group(arglist).setResultsName('args') # function keyword arguments key = pp.Word(pp.alphas) + pp.Suppress('=') values = (number | pp.Word(pp.alphas)) keyval = pp.dictOf(key, values) kwarglist = pp.delimitedList(keyval) kwargs = pp.Group(kwarglist).setResultsName('kwargs') # build generic function fxn_args = pp.Optional(args) + pp.Optional(kwargs) fxn_name = (pp.Word(pp.alphas)).setResultsName('fxn') fxn = pp.Group(fxn_name + LPAR + fxn_args + RPAR) # overall (recursvie) where clause whereexp = pp.Forward() # condition condition = pp.Group(name + operator + value).setResultsName('condition') condition.setParseAction(Condition) # between condition
""" __all__ = ("config_from_file",) from pkgcore.config import basics, errors from snakeoil import mappings, modules import pyparsing as pyp # this is based on the 'BIND named.conf parser' on pyparsing's webpage _section = pyp.Forward() _value = (pyp.Word(pyp.alphanums + './_') | pyp.quotedString.copy().setParseAction(pyp.removeQuotes)) _section_contents = pyp.dictOf( _value, pyp.Group(pyp.OneOrMore(_value | _section)) + pyp.Suppress(';')) # "statement seems to have no effect" # pylint: disable-msg=W0104 _section << pyp.Group(pyp.Suppress('{') + _section_contents + pyp.Suppress('}')) parser = ( pyp.stringStart + pyp.dictOf(_value, _section).ignore(pyp.pythonStyleComment) + pyp.stringEnd) class ConfigSection(basics.ConfigSection): """Expose a section_contents from pyparsing as a ConfigSection.
def __init__(self, bus): cherrypy.process.plugins.SimplePlugin.__init__(self, bus) # Parsing primitives integer = pp.Word(pp.nums) ipv4 = pp.Combine( integer + "." + integer + "." + integer + "." + integer ) ipv6 = pp.Word(pp.alphanums + ":") month3 = pp.Word( string.ascii_uppercase, string.ascii_lowercase, exact=3 ) date10 = pp.Group( pp.Word(pp.nums, exact=4) + pp.Suppress("-") + pp.Word(pp.nums, exact=2) + pp.Suppress("-") + pp.Word(pp.nums, exact=2) ) date7 = pp.Group( pp.Word(pp.nums, exact=4) + pp.Suppress("-") + pp.Word(pp.nums, exact=2) ) tzoffset = pp.Word("+-", pp.nums) timestamp = pp.Group( pp.Suppress("[") + pp.Combine( integer + "/" + month3 + "/" + integer + ":" + integer + ":" + integer + ":" + integer + " " + tzoffset ) + pp.Suppress("]") ) optional_not = pp.Optional(pp.Literal("not")) # Appengine Combined Log Grammar # Field order is documented at: # https://cloud.google.com/appengine/docs/python/logs/ # # This is heavily based on: # http://pyparsing.wikispaces.com/file/view/httpServerLogParser.py/30166005/httpServerLogParser.py # ip appengine_fields = (ipv4 | ipv6).setResultsName("ip") # ident appengine_fields += pp.Suppress("-") # auth appengine_fields += ( "-" | pp.dblQuotedString | pp.Word(pp.alphanums + "@._") ).setParseAction(self.dash_to_none) # timestamp appengine_fields += timestamp.setResultsName( "timestamp" ).setParseAction( self.first_in_group ).setResultsName("timestamp") # cmd appengine_fields += pp.dblQuotedString.setParseAction( self.request_fields ).setResultsName("cmd") # status appengine_fields += ("-" | integer).setParseAction( self.dash_to_none ).setResultsName("statusCode") # bytes sent appengine_fields += ("-" | integer).setParseAction( self.dash_to_none ).setResultsName("numBytesSent") # referrer appengine_fields += ("-" | pp.dblQuotedString).setParseAction( pp.removeQuotes, self.dash_to_none ).setResultsName("referrer") # agent appengine_fields += ("-" | pp.dblQuotedString).setParseAction( pp.removeQuotes, self.dash_to_none ).setResultsName("agent") # host appengine_fields += pp.Optional( pp.dblQuotedString.setParseAction(pp.removeQuotes) ).setResultsName("host") # extras appengine_fields += pp.Optional( pp.dictOf( pp.Word(pp.alphanums + "_") + pp.Suppress("="), pp.dblQuotedString ).setParseAction( self.to_dict ).setResultsName("extras") ) self.appengine_grammar = appengine_fields # Custom grammar for querying the logindex database # Converts a list key-value pairs to SQL self.logquery_grammar = pp.delimitedList( pp.Or([ # relative date ( pp.Literal("date") + pp.oneOf("today yesterday") ).setParseAction(self.log_query_relative_date), # absolute date in yyyy-mm-dd or yyyy-mm format ( pp.Literal("date") + pp.OneOrMore(date10 | date7) ).setParseAction(self.log_query_absolute_date), # numeric fields ( pp.oneOf("statusCode") + optional_not + pp.OneOrMore(integer) ).setParseAction(self.log_query_numeric), # url ( pp.oneOf(""" uri ip """) + optional_not + pp.OneOrMore(pp.Word(pp.alphanums + "%/-.:")) ).setParseAction(self.log_query_wildcard), # string fields ( pp.oneOf(""" city country region classification method cookie uri agent_domain classification referrer_domain""") + optional_not + pp.OneOrMore(pp.Word(pp.alphanums + ".-")) ).setParseAction(self.log_query_exact_string), # string field involving a subquery ( pp.oneOf(""" reverse_domain """) + pp.OneOrMore(pp.Word(pp.alphanums + ".-")) ).setParseAction(self.log_query_subquery), ]), "|" )
from HTMLParser import HTMLParser from urllib import unquote # parse dict-like syntax from pyparsing import (Suppress, Regex, quotedString, Word, alphas, Group, alphanums, oneOf, Forward, Optional, dictOf, delimitedList, removeQuotes) LBRACK,RBRACK,LBRACE,RBRACE,COLON,COMMA = map(Suppress,"[]{}:,") integer = Regex(r"[+-]?\d+").setParseAction(lambda t:int(t[0])) real = Regex(r"[+-]?\d+\.\d*").setParseAction(lambda t:float(t[0])) string_ = Word(alphas,alphanums+"_") | quotedString.setParseAction(removeQuotes) bool_ = oneOf("true false").setParseAction(lambda t: t[0]=="true") jsParser = Forward() key = string_ dict_ = LBRACE - Optional(dictOf(key+COLON, jsParser+Optional(COMMA))) + RBRACE list_ = LBRACK - Optional(delimitedList(jsParser)) + RBRACK jsParser << (real | integer | string_ | bool_ | Group(list_ | dict_ )) class WebParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.stk = [] self.result = {} def handle_starttag(self, tag, attrs): self.stk.append(tag) def handle_endtag(self, tag): self.stk.pop() def handle_data(self, data): b = 'var hClientFlashVars =' if len(self.stk) and not self.stk[-1:][0].lower() == 'script': return
_enclosed << (pp.delimitedList(pp.pyparsing_common.number | pp.QuotedString('"') | pp.QuotedString("'") | _nestedParens | _nestedBrackets) | _nestedParens | _nestedBrackets) _key = pp.Word(pp.alphas) + pp.Suppress("=") _dict_value = _enclosed + (pp.Suppress(",") | pp.Suppress(pp.SkipTo(")"))) _args = pp.Optional(pp.delimitedList(_enclosed)).setResultsName("args") + ( pp.Suppress(",") | pp.Suppress(pp.SkipTo(_key)) | pp.Suppress(pp.SkipTo(")"))) _kwargs = pp.Optional(pp.dictOf(_key, _dict_value)).setResultsName("kwargs") _parameters = pp.Suppress( "(").leaveWhitespace() + _args + _kwargs + pp.Suppress(")") _function = (pp.Suppress("::") + pp.Word( pp.alphas).leaveWhitespace().setResultsName("name", listAllMatches=True) + _parameters) def options(elem, doc, **kwargs): return None, kwargs def box(elem, doc, repeat): return "[]" * repeat, None