def define_molecule_parser(self): """ Defines specific parsers for BNG molecules """ # molecules can have tags self.parsers.tag = pp.Combine( pp.Word("%") + (self.parsers.base_name ^ pp.Word(pp.nums))) # and compartments self.parsers.compartment = pp.Combine( pp.Word("@") + self.parsers.base_name) # combine tag and compartment tag_comp = (pp.Optional(self.parsers.tag) + pp.Optional(self.parsers.compartment)) ^ ( pp.Optional(self.parsers.compartment) + pp.Optional(self.parsers.tag)) # full molecule self.parsers.molecule = ( self.parsers.base_name + tag_comp + pp.Word("(") + pp.Optional(self.parsers.combined_components_parser) + pp.Word(")") + tag_comp) molecule_parser = pp.Combine(self.parsers.molecule) # molecules # components are separated by commas molecule_separator = pp.Word(".") self.parsers.molecules_parser = pp.delimited_list( molecule_parser, delim=molecule_separator) self.parsers.combined_molecules_parser = pp.delimited_list( molecule_parser, delim=molecule_separator, combine=True)
def __init__(self): self.digits = pp.Word(pp.nums) self.plus_or_minus = pp.oneOf("+ -") self.opt_plus_minus = pp.Optional(self.plus_or_minus) self.mul_or_div = pp.oneOf("* /") self.point = pp.Word(".") self.left_par = pp.Literal("(") self.right_par = pp.Literal(")") self.unsigned_int = self.digits self.signed_int = pp.Combine(self.plus_or_minus + self.unsigned_int) self.opt_signed_int = (pp.Combine(self.opt_plus_minus + self.unsigned_int) .setParseAction(lambda el: int(el[0]))) self.float_num = (((self.unsigned_int + self.point + pp.Optional(self.unsigned_int)) ^ (self.point + self.unsigned_int)) + pp.Optional(pp.CaselessLiteral("e") + self.opt_signed_int)) self.real_num_pos = (pp.Combine(self.float_num).setParseAction(lambda el: float(el[0])) ^ self.unsigned_int.setParseAction(lambda el: int(el[0]))) self.real_num = (pp.Combine(self.opt_plus_minus + self.float_num).setParseAction(lambda el: float(el[0])) ^ self.opt_signed_int.setParseAction(lambda el: int(el[0]))) self.variable_name = pp.Word(pp.alphas + "_", pp.alphas + pp.nums + "_")
def __init__(self): priority = pp.Combine(pp.Suppress('<') + pp.Word(pp.nums) + pp.Suppress('>')) SEPERATOR = pp.Word("!#$%&'()*+,-./:;<=>?@[\]^_`{|}~") #all special chars but space and double quotes objName = pp.Combine(pp.Word(pp.alphanums) + pp.ZeroOrMore(SEPERATOR + pp.Word(pp.alphanums))) value = (pp.quotedString | objName) assgn = pp.Combine(pp.Word(pp.alphas) + "=" + value) self.logLine = priority("pri") + pp.OneOrMore(assgn)("fields")
def get_expression_parser(): sign = pp.Optional(pp.Literal('-')) number = pp.Word(pp.nums) dpoint = pp.Literal('.') ignore_errors = pp.CaselessLiteral(IGNORE_ERRORS) all_envs = pp.CaselessLiteral(ALL_ENVS) eq, neq = pp.Literal(EQUAL), pp.Literal(NOT_EQUAL) eand, eor = pp.CaselessLiteral(AND), pp.CaselessLiteral(OR) option = (ignore_errors | all_envs).setParseAction(_tag_with(_OPTION)) options = pp.Group(pp.ZeroOrMore(option)) operator_test = (eq | neq).setParseAction(_tag_with(TEST)) operator_logical = (eand | eor).setParseAction(_tag_with(_LOGICAL)) begin_if = pp.CaselessLiteral(_IF).setParseAction(_tag_with(_IF)) obj = pp.Word(pp.printables).setParseAction(_tag_with(_OBJ)) integer = pp.Combine(sign + number + pp.WordEnd()).setParseAction( _tag_with(_OBJ, int)) real = pp.Combine(sign + ((number + dpoint + number) | (dpoint + number) | (number + dpoint))).setParseAction( _tag_with(_OBJ, float)) expritem = integer | real | obj single_test = expritem + operator_test + expritem additional_test = operator_logical + single_test expr_var = pp.Group(obj + s_end).setParseAction(_tag_with(VALUE)) expr_test = pp.Group(obj + begin_if + single_test + pp.ZeroOrMore(additional_test) + s_end).setParseAction(_tag_with(TEST)) expr_list_test = pp.Group(begin_if + single_test + pp.ZeroOrMore(additional_test) + s_end).setParseAction(_tag_with(LIST_TEST)) expr = expr_test | expr_var | expr_list_test line = options + expr + s_end return line
def parse(s): # {{{1 """Parse a Factor' program.""" # TODO: whitespace ??? sym = P.alphanums + "!@#$%^&*()-_=+[]{}|<>,.?/'" # TODO g, k, l, i, z = P.Group, P.Keyword, P.Literal, P.Suppress, \ P.ZeroOrMore ik = lambda x: i(k(x)) value, term = P.Forward(), P.Forward() symbol = P.Word(sym)("sym") boolean = P.Combine( k("t") | k("f") )("bool") number = P.Word(P.nums)("num") # TODO string = P.Combine( i(l('"')) + P.Regex(r'(?:[^"\n\r\\]|' r'(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + i(l('"')) )("str") # TODO array = g( ik("{") + z(g(value)) + ik("}") )("ary") value << ( boolean | number | string | array ) word = symbol("word") # not prim|bool quotation = g( ik("[") + z( ~ik("]") + term )("body") + ik("]") )("quot") stack_effect = g( ik("(" ) + z( ~ik("--") + symbol )("pre") + ik("--") + z( ~ik(")" ) + symbol )("post") + ik(")" ) ) word_def = g( ik(":") + word("name") + stack_effect("eff") + z(term)("body") + ik(";") )("wdef") term << g( quotation | value | word ) # or primitive program = z( g(word_def) | term )("prog") return program.parseString(s, True)
def __parse(self): lbrace, rbrace, semi, quote = map(pyparsing.Suppress, '{};"') ip_address = pyparsing.Combine(pyparsing.Word(pyparsing.nums) + ('.' + pyparsing.Word(pyparsing.nums)) * 3) hex_int = pyparsing.Word(pyparsing.hexnums, exact=2) mac_address = pyparsing.Combine(hex_int + (':' + hex_int) * 5) hdw_type = pyparsing.Word(pyparsing.alphanums) yyyy_mm_dd = pyparsing.Combine((pyparsing.Word(pyparsing.nums, exact=4) | pyparsing.Word(pyparsing.nums, exact=2)) + ('/' + pyparsing.Word(pyparsing.nums, exact=2)) * 2) hh_mm_ss = pyparsing.Combine(pyparsing.Word(pyparsing.nums, exact=2) + (':' + pyparsing.Word(pyparsing.nums, exact=2)) * 2) date_ref = pyparsing.oneOf(list("0123456"))("weekday") + yyyy_mm_dd("date") + hh_mm_ss("time") def to_datetime(tokens): tokens["datetime"] = datetime.strptime("%(date)s %(time)s" % tokens, "%Y/%m/%d %H:%M:%S") date_ref.setParseAction(to_datetime) starts_stmt = "starts" + date_ref + semi ends_stmt = "ends" + (date_ref | "never") + semi tstp_stmt = "tstp" + date_ref + semi tsfp_stmt = "tsfp" + date_ref + semi hdw_stmt = "hardware" + hdw_type("type") + mac_address("mac") + semi uid_stmt = "uid" + pyparsing.QuotedString('"')("uid") + semi binding_stmt = "binding" + pyparsing.Word(pyparsing.alphanums) + pyparsing.Word(pyparsing.alphanums) + semi lease_statement = starts_stmt | ends_stmt | tstp_stmt | tsfp_stmt | hdw_stmt | uid_stmt | binding_stmt lease_def = "lease" + ip_address("ipaddress") + lbrace + \ pyparsing.Dict(pyparsing.ZeroOrMore(pyparsing.Group(lease_statement))) + rbrace with open(self.lease_file, 'r') as file: parsed = lease_def.scanString(file.read()) return parsed
def process(config): pathexpr = p.Literal("'").suppress() + \ p.Optional( p.Combine( p.OneOrMore(p.Literal("/") + p.Word(p.alphanums)) + p.Literal("/").suppress() ) ).setResultsName('path') + \ p.Combine( (p.Literal('@').suppress() | p.Literal('!').suppress()) + p.Word(p.alphanums) + p.Literal("'").suppress() ).setResultsName('attrib') expr = p.Group(pathexpr).setResultsName('search') match = expr.parseString(config) _ret = [] if 'search' in match: if 'path' in match['search']: _ret.append(match['search']['path']) if 'attrib' in match['search']: _ret.append(match['search']['attrib']) return _ret
def __get_fields(data, fields): left_par = pyparsing.Literal('(') right_par = pyparsing.Literal(')') field = pyparsing.Combine( pyparsing.Word(pyparsing.alphanums + '_') + pyparsing.Optional('*')) | '*' nested_field = pyparsing.Forward() nested_field << pyparsing.Combine( field + left_par + pyparsing.Or(nested_field | field) + pyparsing. ZeroOrMore(pyparsing.Word(',') + pyparsing.Or(nested_field | field)) + right_par) root_fields = pyparsing.Or(nested_field | field) +\ pyparsing.ZeroOrMore(pyparsing.Suppress(',') + pyparsing.Or(nested_field | field)) for parser in root_fields.parseString(fields).asList(): match = re.compile('([\w ]+|\*)([\w,*() ]+\))?(\*?)').match(parser) name = match.group(1) include_all = match.group(2) == '(*)' recursive = match.group(3) == '*' if recursive: data[name] = True elif include_all: data[name] = '*' elif match.group(2) is not None: data[name] = {} sub_fields = re.search('^\((.*)\)$', match.group(2)).group(1) __get_fields(data[name], sub_fields) else: data[name] = {} return data
def grammar(cls): """Returns the grammar to parse a number that may have a sign and a decimal point""" sign = pp.Word('+-', exact=1) digits = pp.Word(pp.nums) decimal = pp.Combine(pp.Word('.', exact=1) + digits) return pp.Combine(pp.Optional(sign) + digits + pp.Optional(decimal))
def _parse(self, expr): self.exprStack = [] parser = pp.Forward() ffn = (fident + pp.Literal('(').suppress() + pp.Group(pp.Optional(pp.delimitedList(parser))).setParseAction(self._pushLen) + pp.Literal(')').suppress()) ffield = (pp.Optional(pp.Word(pp.nums) + pp.Literal(":").suppress(), default=self.article.pk) + fident ).setParseAction(lambda strg, loc, st: dict( article_pk=st[0], name=st[1], )) atom = ((ffn.setResultsName('fn') | ffield.setResultsName('field') | pp.QuotedString('"').setResultsName('str') | fnumber.setResultsName('float') ).setParseAction(self._push) | pp.Group(pp.Literal('(').suppress() + parser + pp.Literal(')').suppress())) term = atom + pp.ZeroOrMore((pp.Combine(pp.Literal("*") | pp.Literal("/")).setResultsName('op') + atom).setParseAction(self._push)) parser << term + pp.ZeroOrMore((pp.Combine(pp.Literal("+") | pp.Literal("-")).setResultsName('op') + term).setParseAction(self._push)) parser.parseString(expr, True)
def get_external_link(flag=False): """Get external_link parser element. external_link = "[", url, [ space | tab, link_text ], "]"; :param bool flag: toggle debug messages on/off :returns: external_link parser element :rtype: ParserElement """ try: external_link_opening = pyparsing.Literal("[") url = _get_url(flag=flag) space_tab = pyparsing.Regex(r" |\t") link_text = _get_link_text(flag=flag) external_link_closing = pyparsing.Literal("]") external_link = pyparsing.Combine( external_link_opening + url + pyparsing.Optional(pyparsing.Combine(space_tab + link_text)) + external_link_closing ) external_link.leaveWhitespace() external_link.parseWithTabs() if flag: external_link.setDebug() external_link.setName("external_link") external_link = external_link.setResultsName("external_link") except Exception as exception: msg = "failed to get external_link parser element:{}".format(exception) raise RuntimeError(msg) return external_link
def build_parser(self): parsed_term = pyparsing.Group(pyparsing.Combine(pyparsing.Word(pyparsing.alphanums) + \ pyparsing.Suppress('*'))).setResultsName('wildcard') | \ pyparsing.Group(pyparsing.Combine(pyparsing.Word(pyparsing.alphanums+"._") + \ pyparsing.Word(':') + pyparsing.Group(pyparsing.Optional("\"") + \ pyparsing.Optional("<") + pyparsing.Optional(">") + pyparsing.Optional("=") + \ pyparsing.Optional("-") + pyparsing.Word(pyparsing.alphanums+"._/") + \ pyparsing.Optional("&") + pyparsing.Optional("<") + pyparsing.Optional(">") + \ pyparsing.Optional("=") + pyparsing.Optional("-") + \ pyparsing.Optional(pyparsing.Word(pyparsing.alphanums+"._/")) + \ pyparsing.Optional("\"")))).setResultsName('fields') | \ pyparsing.Group(pyparsing.Combine(pyparsing.Suppress('-')+ \ pyparsing.Word(pyparsing.alphanums+"."))).setResultsName('not_term') | \ pyparsing.Group(pyparsing.Word(pyparsing.alphanums)).setResultsName('term') parsed_or = pyparsing.Forward() parsed_quote_block = pyparsing.Forward() parsed_quote_block << ( (parsed_term + parsed_quote_block) | parsed_term) parsed_quote = pyparsing.Group(pyparsing.Suppress('"') + parsed_quote_block + \ pyparsing.Suppress('"')).setResultsName("quotes") | parsed_term parsed_parenthesis = pyparsing.Group((pyparsing.Suppress("(") + parsed_or + \ pyparsing.Suppress(")"))).setResultsName("parenthesis") | parsed_quote parsed_and = pyparsing.Forward() parsed_and << (pyparsing.Group(parsed_parenthesis + pyparsing.Suppress(pyparsing.Keyword("and")) + \ parsed_and).setResultsName("and") | \ pyparsing.Group(parsed_parenthesis + pyparsing.OneOrMore(~pyparsing.oneOf("or and") + \ parsed_and)).setResultsName("and") | parsed_parenthesis) parsed_or << (pyparsing.Group(parsed_and + pyparsing.Suppress(pyparsing.Keyword("or")) + \ parsed_or).setResultsName("or") | parsed_and) return parsed_or.parseString
def __init__(self): """constructor""" """make LAD parser""" self.NwNumber = pp.Word(pp.nums, max=1).setParseAction(pp.tokenMap(int)).setBreak(False) self.Nw = pp.CaselessLiteral('NW:') + self.NwNumber + pp.Suppress(pp.lineEnd()) self.Ope_I = pp.Combine(pp.CaselessLiteral('I') + pp.Word(pp.nums, max=2)) self.Ope_O = pp.Combine(pp.CaselessLiteral('O') + pp.Word(pp.nums, max=2)) self.Ope_M = pp.Combine(pp.CaselessLiteral('M') + pp.Word(pp.nums, max=2)) self.Ope = self.Ope_I | self.Ope_O | self.Ope_M self.Command_LD = (pp.CaselessKeyword('LDN') | pp.CaselessKeyword ('LD')) + self.Ope + pp.Suppress(pp.lineEnd()) self.Command_AND = (pp.CaselessKeyword('ANDN') | pp.CaselessKeyword ('AND')) + self.Ope + pp.Suppress(pp.lineEnd()) self.Command_OR = (pp.CaselessKeyword('ORN') | pp.CaselessKeyword('OR')) + self.Ope + pp.Suppress(pp.lineEnd()) self.Command_OUT = pp.CaselessKeyword('OUT') + self.Ope + pp.Suppress(pp.lineEnd()) self.Command_BSAND = pp.CaselessKeyword('BSAND') + pp.Suppress(pp.lineEnd()) self.Command_BFAND = pp.CaselessKeyword('BFAND') + pp.Suppress(pp.lineEnd()) self.Command_BSOR = pp.CaselessKeyword('BSOR') + pp.Suppress(pp.lineEnd()) self.Command_BFOR = pp.CaselessKeyword('BFOR') + pp.Suppress(pp.lineEnd()) self.Command_LDOR = self.Command_LD + self.Command_OR * (0, 7) self.Command_ANDOR = self.Command_AND + self.Command_OR * (0, 7) self.Command_LDAND = self.Command_LDOR + self.Command_ANDOR * (0, 7) self.Complex = pp.Forward() self.Block = pp.Group((self.Complex | self.Command_LDAND) + pp.Optional(self.Command_ANDOR * (0, 7))) self.ComplexOR = self.Command_BSOR + self.Block + self.Block + self.Command_BFOR self.ComplexAND = self.Command_BSAND + self.Block + self.Block + self.Command_BFAND self.Complex <<= self.ComplexOR | self.ComplexAND self.NwProgram = pp.Group(self.Nw + self.Block + self.Command_OUT) self.Program = pp.OneOrMore(self.NwProgram)
def parse_file(file_name): first = pp.Word(pp.alphas + "_", exact=1) rest = pp.Word(pp.alphanums + "_") number = pp.Word(pp.nums) identifier = pp.Combine(first + pp.Optional(rest)) lbrace = pp.Literal('{').suppress() rbrace = pp.Literal('}').suppress() cls = pp.Literal('class') colon = pp.Literal(":") semi = pp.Literal(";").suppress() langle = pp.Literal("<") rangle = pp.Literal(">") equals = pp.Literal("=") comma = pp.Literal(",") lparen = pp.Literal("(") rparen = pp.Literal(")") lbrack = pp.Literal("[") rbrack = pp.Literal("]") mins = pp.Literal("-") struct = pp.Literal('struct') template = pp.Literal('template') final = pp.Literal('final').setResultsName("final") stub = pp.Literal('stub').setResultsName("stub") with_colon = pp.Word(pp.alphanums + "_" + ":") btype = with_colon type = pp.Forward() nestedParens = pp.nestedExpr('<', '>') tmpl = pp.Group(btype.setResultsName("template_name") + langle.suppress() + pp.Group(pp.delimitedList(type)) + rangle.suppress()) type << (tmpl | btype) enum_lit = pp.Literal('enum') enum_class = pp.Group(enum_lit + cls) ns = pp.Literal("namespace") enum_init = equals.suppress() + pp.Optional(mins) + number enum_value = pp.Group(identifier + pp.Optional(enum_init)) enum_values = pp.Group(lbrace + pp.delimitedList(enum_value) + pp.Optional(comma) + rbrace) content = pp.Forward() member_name = pp.Combine(pp.Group(identifier + pp.Optional(lparen + rparen))) attrib = pp.Group(lbrack.suppress() + lbrack.suppress() + pp.SkipTo(']') + rbrack.suppress() + rbrack.suppress()) opt_attribute = pp.Optional(attrib).setResultsName("attribute") namespace = pp.Group(ns.setResultsName("type") + identifier.setResultsName("name") + lbrace + pp.Group(pp.OneOrMore(content)).setResultsName("content") + rbrace) enum = pp.Group(enum_class.setResultsName("type") + identifier.setResultsName("name") + colon.suppress() + identifier.setResultsName("underline_type") + enum_values.setResultsName("enum_values") + pp.Optional(semi).suppress()) default_value = equals.suppress() + pp.SkipTo(';') class_member = pp.Group(type.setResultsName("type") + member_name.setResultsName("name") + opt_attribute + pp.Optional(default_value).setResultsName("default") + semi.suppress()).setResultsName("member") template_param = pp.Group(identifier.setResultsName("type") + identifier.setResultsName("name")) template_def = pp.Group(template + langle + pp.Group(pp.delimitedList(template_param)).setResultsName("params") + rangle) class_content = pp.Forward() class_def = pp.Group(pp.Optional(template_def).setResultsName("template") + (cls | struct).setResultsName("type") + with_colon.setResultsName("name") + pp.Optional(final) + pp.Optional(stub) + opt_attribute + lbrace + pp.Group(pp.ZeroOrMore(class_content)).setResultsName("members") + rbrace + pp.Optional(semi)) content << (enum | class_def | namespace) class_content << (enum | class_def | class_member) rt = pp.OneOrMore(content) singleLineComment = "//" + pp.restOfLine rt.ignore(singleLineComment) rt.ignore(pp.cStyleComment) return rt.parseFile(file_name, parseAll=True)
def pp_path(): relative_path = pp.Combine( pp.delimitedList( InstructionBase.pp_word( without="/"), delim="/", combine=True) + pp.Optional(pp.Word("/"))) absolute_path = pp.Combine("/" + pp.Optional(relative_path)) return (relative_path ^ absolute_path).setName("path")
def __init__(self): self.structure = None # Name selectors kw_chain = pp.Keyword('chain') kw_resn = pp.Keyword('resn') kw_altloc = pp.Keyword('altloc') kw_name = pp.Keyword('name') kw_icode = pp.Keyword('icode') kw_resseq = pp.Keyword('resseq') # Resi is special cause it can take a range kw_resi = pp.Keyword('resi') # Number selectors kw_b = pp.Keyword('b') kw_q = pp.Keyword('q') identifyer_selectors = kw_chain | kw_resn | kw_altloc | kw_name | kw_icode | kw_resseq numeric_selectors = kw_b | kw_q # operators kw_or = pp.Keyword('or') kw_and = pp.Keyword('and') kw_and_or = kw_or | kw_and kw_not = pp.Keyword('not') plus = pp.Literal('+').suppress() minus = pp.Literal('-') plus_minus = plus | minus lpar = pp.Literal('(').suppress() rpar = pp.Literal(')').suppress() point = pp.Literal('.') operators = (pp.Literal('>=') | pp.Literal('!=') | pp.Literal('<=') | pp.Literal('>') | pp.Literal('<') | pp.Literal('==')) empty = pp.Literal('""') | pp.Literal("''") identifyer = pp.Word(pp.alphanums + "'" + '.') | empty number = pp.Word(pp.nums) integer = pp.Combine(pp.Optional(plus_minus) + number) floatnumber = pp.Combine(integer + pp.Optional(point + pp.Optional(number))).setParseAction(self._push_first) resi_number = integer arange = resi_number + pp.Optional(minus + resi_number) identifyers = identifyer + pp.ZeroOrMore(plus + identifyer) identifyer_selection = identifyer_selectors + identifyers.setParseAction(self._push) resi_selection = kw_resi + (arange + pp.ZeroOrMore(plus + arange)).setParseAction(self._push) numeric_selection = numeric_selectors + (operators + floatnumber).setParseAction(self._push_first) expression = pp.Forward() selections = identifyer_selection | resi_selection | numeric_selection atom = selections.setParseAction(self._push_first) | lpar + expression + rpar factor = (kw_not + atom).setParseAction(self._push_first) | atom # PyMol doesnt specify precedence of "or" and "and" expression << factor + pp.ZeroOrMore((kw_and_or + factor).setParseAction(self._push_first)) self.expression = expression
def quoted_space_sep_params_token(): r"""Gets a token parser that supports This parser expects all commands to take the following form: <command_name>[ <optional_param1> <optional_param2>...]<eol> The <command_name> maps to a method of this class or a descendent of it with the same name but with a prefix of "do\_". As a result tokens for commands must adhere to the same restrictions as a typical Python class method. Commands may optionally accept parameters if desired. Parameters are provided on the same line as the command and are separated by spaces. Each parameter may use any printable character since they are translated to Python strings during translation and thus need not be restricted to the same criteria as commands. Also, if a parameter needs to contain embedded spaces then it must be wrapped in quotation marks. Single or double quotes will work. Further, if you need to embed quotation marks within your string simply wrap the inner quotes with outer quotes of the opposite style. The command token can be accessed by named attribute 'command', and the list of any parameters provided on the line can be acessed by the named attribute 'params'. TODO: Add support for escaping quote characters when embedded in strings delimited with the same quote style, as in "\"hello\" world" :rtype: :class:`pyparsing.Parser`""" # token for a command parameter with no spaces embedded in it simple_parameter = pp.Word(pp.printables, excludeChars='"\'') # Complex parameter with embedded spaces delimited by single quote # characters. Such params may also contain embedded double quotes single_quoted_parameter = pp.Combine( pp.Word("'") + pp.Optional(pp.Word(pp.printables + ' ', excludeChars="'")) + pp.Optional(pp.Word("'"))) # Complex parameter with embedded spaces delimited by double quote # characters. Such params may also contain embedded single quotes double_quoted_parameter = pp.Combine( pp.Word('"') + pp.Optional(pp.Word(pp.printables + ' ', excludeChars='"')) + pp.Optional(pp.Word('"'))) # Token represent an arbitrary parameter which may be one of the quoted or # simple formatter params defined above. NOTE: pyparsing uses ^ as a # shorthand for OR operator parameter = \ simple_parameter ^ single_quoted_parameter ^ double_quoted_parameter # A command line may have multiple parameters separated by spaces. Below # is the token encapsulating all params in a command line. These will be # parsed into a list of tokens accessible by the 'params' named parameter params = pp.OneOrMore(parameter).setResultsName('params') return params
def parse_buffer(cls, sensor_uuid, buf): # Defining generic pyparsing objects. integer = pyp.Word(pyp.nums) ip_addr = pyp.Combine(integer + '.' + integer+ '.' + integer + '.' + integer) port = pyp.Suppress(':') + integer # Defining pyparsing objects from expected format: # # [**] [1:160:2] COMMUNITY SIP TCP/IP message flooding directed to SIP proxy [**] # [Classification: Attempted Denial of Service] [Priority: 2] # 01/10-00:08:23.598520 201.233.20.33:63035 -> 192.234.122.1:22 # TCP TTL:53 TOS:0x10 ID:2145 IpLen:20 DgmLen:100 DF # ***AP*** Seq: 0xD34C30CE Ack: 0x6B1F7D18 Win: 0x2000 TcpLen: 32 # # Note: This format is known to change over versions. # Works with Snort version 2.9.2 IPv6 GRE (Build 78) header = ( pyp.Suppress("[**] [") + pyp.Combine(integer + ":" + integer + ":" + integer) + pyp.Suppress("]") ) signature = ( pyp.Combine(pyp.SkipTo("[**]", include=False)) + pyp.Suppress("[**]") ) classif = ( pyp.Suppress(pyp.Literal("[Classification:")) + pyp.Regex("[^]]*") + pyp.Suppress(']') ) pri = pyp.Suppress("[Priority:") + integer + pyp.Suppress("]") date = pyp.Combine( # day/month/year (year is optional, depends on snort being started with -y) integer + "/" + integer + pyp.Optional(pyp.Combine("/" + integer), default="/"+str(datetime.now().year)[2:4]) + \ '-' + integer + ':' + integer + ':' + integer + '.' + integer ) src_ip = ip_addr src_port = port arrow = pyp.Suppress("->") dest_ip = ip_addr dest_port = port proto = pyp.Regex("\S+") bnf = header + signature + pyp.Optional(classif, default='') + pri + date + \ src_ip + pyp.Optional(src_port, default='') + arrow + dest_ip + \ pyp.Optional(dest_port, default='') + proto fields = bnf.searchString(buf) if fields: if abs(datetime.utcnow() - datetime.now()).total_seconds() > 1: # Since snort doesn't log in UTC, a correction is needed to # convert the logged time to UTC. The following code calculates # the delta between local time and UTC and uses it to convert # the logged time to UTC. Additional time formatting makes # sure the previous code doesn't break. fields[0] = [f.strip() for f in fields[0]] return cls(sensor_uuid, *fields[0]) else: return None
def getMacroInitializerBNF(): cs = pp.Forward() ident = pp.Word(pp.alphas + "_", pp.alphas + pp.nums + "_") intNum = pp.Word(pp.nums) hexNum = pp.Literal("0x") + pp.Word(pp.hexnums) octalNum = pp.Literal("0") + pp.Word("01234567") integer = (hexNum | octalNum | intNum) + \ pp.Optional(pp.Literal("ULL") | pp.Literal("LL") | pp.Literal("L")) floatNum = pp.Regex(r'\d+(\.\d*)?([eE]\d+)?') + pp.Optional(pp.Literal("f")) char = pp.Literal("'") + pp.Word(pp.printables, exact=1) + pp.Literal("'") arrayIndex = integer | ident lbracket = pp.Literal("(").suppress() rbracket = pp.Literal(")").suppress() lbrace = pp.Literal("{").suppress() rbrace = pp.Literal("}").suppress() comma = pp.Literal(",").suppress() equals = pp.Literal("=").suppress() dot = pp.Literal(".").suppress() semicolon = pp.Literal(";").suppress() # initializer := { [member = ] (variable | expression | { initializer } ) } typeName = ident varName = ident typeSpec = pp.Optional("unsigned") + \ pp.oneOf("int long short float double char u8 i8 void") + \ pp.Optional(pp.Word("*"), default="") typeCast = pp.Combine( "(" + ( typeSpec | typeName ) + ")" ).suppress() string = pp.Combine(pp.OneOrMore(pp.QuotedString(quoteChar='"', escChar='\\', multiline=True)), adjacent=False) literal = pp.Optional(typeCast) + (integer | floatNum | char | string) var = pp.Combine(pp.Optional(typeCast) + varName + pp.Optional("[" + arrayIndex + "]")) expr = (literal | var) # TODO member = pp.Combine(dot + varName + pp.Optional("[" + arrayIndex + "]"), adjacent=False) value = (expr | cs) entry = pp.Group(pp.Optional(member + equals, default="") + value) entries = (pp.ZeroOrMore(entry + comma) + entry + pp.Optional(comma)) | \ (pp.ZeroOrMore(entry + comma)) cs << (lbrace + entries + rbrace) macroName = ident params = pp.Group(pp.ZeroOrMore(expr + comma) + expr) macroParams = lbracket + params + rbracket mi = macroName + pp.Optional(macroParams) + equals + pp.Group(cs) + semicolon mi.ignore(pp.cppStyleComment) return mi
def _typeof_expression(): keyword = ( pyparsing.Keyword('typeof') | pyparsing.Keyword('__typeof__') ) return pyparsing.Combine( keyword + pyparsing.Literal('(') + pyparsing.Combine(_anything_beetween('()')) + pyparsing.Literal(')') )
def parse_math_expr(estr): """ Function to parse algebraic expressions. This function identifies built-in functions, numbers, variables and previously defined functions as they occur in algebraic expressions. Functions are alphanumeric strings starting with a letter. They are preceded by an arithmetic operator or a parenthesis and encompass something in parentheses. Parameters are also alphanumeric strings starting with a letter. They are preceded and succeeded by operators or parentheses Parameters ---------- estr : str String in BNGL file corresponding to an algebraic expression Returns ------- list List of algebraic tokens, including functions, variables, numbers, and operators """ point = pp.Literal(".") e = pp.CaselessLiteral("E") fnumber = pp.Combine(pp.Word("+-" + pp.nums, pp.nums) + pp.Optional(point + pp.Optional(pp.Word(pp.nums))) + pp.Optional(e + pp.Word("+-" + pp.nums, pp.nums))) ident = pp.Word(pp.alphas, pp.alphas + pp.nums + "_") plus = pp.Literal("+") minus = pp.Literal("-") mult = pp.Literal("*") div = pp.Literal("/") lpar = pp.Literal("(") rpar = pp.Literal(")") addop = plus | minus multop = mult | div expop = pp.Literal("^") lowerPi = pp.Literal("pi") upperPi = pp.Literal("PI") pi = lowerPi | upperPi func = pp.Combine(ident + lpar + rpar) expr = pp.Forward() atom = (pp.Optional("-") + (pi ^ e ^ fnumber ^ ident + lpar + expr + rpar ^ func ^ ident) ^ (lpar + expr + rpar)) factor = pp.Forward() factor << atom + pp.ZeroOrMore((expop + factor)) term = factor + pp.ZeroOrMore((multop + factor)) expr << term + pp.ZeroOrMore((addop + term)) pattern = expr return pattern.parseString(estr.strip())
def primative(self): point = pp.Literal('.') number = pp.Word(pp.nums) integer = pp.Combine(pp.Optional('-') + number) float_ = pp.Combine( pp.Optional('-') + number + point + pp.OneOrMore(number)) string = pp.quotedString true = pp.Literal("True") | pp.Literal("true") | pp.Literal("TRUE") false = pp.Literal("False") | pp.Literal("false") | pp.Literal("FALSE") boolean = true | false primative = float_ | integer | string | boolean return primative
def __full_log_parse(self, logfile): """ Code utilizes py parser to set up regular expressions for a Snort Alert File (full logging only). It is important to note this may need adjustment based on logging style chosen and fields for entries. :param logfile: Snort alert log file - from full logging configuration. :return ret: Dictionary containing parsed alert log entries. """ i = 0 ret = {} integer = pyp.Word(pyp.nums) ip_addr = (pyp.Combine(integer + '.' + integer + '.' + integer + '.' + integer) + pyp.Suppress(pyp.Optional(":" + integer))) header = (pyp.Suppress("[**] [") + pyp.Combine(integer + ":" + integer + ":" + integer) + pyp.Suppress(pyp.SkipTo("[**]", include=True))) cls = (pyp.Suppress(pyp.Optional(pyp.Literal("[Classification:"))) + pyp.Regex("[^]]*") + pyp.Suppress(']')) pri = pyp.Suppress("[Priority:") + integer + pyp.Suppress("]") date = pyp.Combine(integer + "/" + integer + '-' + integer + ':' + integer + ':' + integer + '.' + integer) src_ip = ip_addr + pyp.Suppress("->") dest_ip = ip_addr # TODO: In example, classification doesn't exist, but if inconsistent output won't parse right # TODO: Suppressed port information from extraction for now (not in ICMP entries) # TODO: might want to cascade matching -> on header, then match on IPs, then match on ports # bnf = header+cls+pri+date+src_ip+dest_ip bnf = header + pri + date + src_ip + dest_ip try: with open(logfile) as snort_logfile: for has_content, grp in itertools.groupby( snort_logfile, key=lambda x: bool(x.strip())): if has_content: tmpStr = ''.join(grp) fields = bnf.searchString(tmpStr) if fields: ret[i] = {} ret[i]['file'] = logfile ret[i]['text'] = tmpStr ret[i]['matches'] = fields i += 1 except Exception as e: self._logger.warning( "Unable to read file {0}. Exceptions={1}".format(logfile, e)) return ret
def craft_parse(text): """ """ LineComment = pyp.Combine(pyp.Literal('::') + pyp.restOfLine).suppress() BlockComment = pyp.Combine( pyp.Literal(':>') + pyp.SkipTo(pyp.Literal('<:')) + pyp.Literal('<:')).suppress() Comment = BlockComment | LineComment BlockComment = pyp.Combine( pyp.Literal(':<') + pyp.Combine( pyp.NotAny(pyp.Literal(':>')) + pyp.Word(pyp.printables + ' ')) + pyp.Literal('>:')) Identifier = pyp.Word(pyp.alphanums + '!#$%&()*+,./;<=>?@\\^-_`{|}~') Value = (Comment | pyp.QuotedString('"') | pyp.QuotedString("'") | Identifier.addParseAction(_type_cast_value)) LBRACKET, RBRACKET, COLON = map(pyp.Suppress, '[]:') Function = pyp.Forward() List = pyp.Forward() Function << pyp.Dict( pyp.Group(Identifier + pyp.Literal(':') + pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List | Value) + RBRACKET))) List << pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List | Value) + RBRACKET) Program = pyp.OneOrMore(Comment | Function) # Validate for syntax error messages: validator = SourceValidator() Value.setParseAction(validator.validate) List.setParseAction(validator.validate) Identifier.addParseAction(validator.validate) #Comment.setParseAction(validator.validate) Function.setParseAction(validator.validate) Program.setParseAction(validator.validate) syntax_error = None try: return __walk(Program.parseString(text)[0]) except Exception as e: syntax_error = validator.panic() # Now raise the exception with a clean stack trace raise syntax_error
def __init__(self): newline = "\n" space_plus = pp.Regex("[ \t]+") space_star = pp.Optional(space_plus) quoted_element = pp.Regex(r'[^\\"]|\\[^A-Za-z0-9]|\\[trn]') quoted_argument = pp.Combine('"' + pp.ZeroOrMore(quoted_element) + '"') bracket_content = pp.Forward() def action_bracket_open(tokens: pp.ParseResults): nonlocal bracket_content marker = "]" + "=" * (len(tokens[0]) - 2) + "]" bracket_content <<= pp.SkipTo(marker, include=True) bracket_open = pp.Regex(r"\[=*\[").setParseAction(action_bracket_open) bracket_argument = pp.Combine(bracket_open + bracket_content) unquoted_element = pp.Regex(r'[^\s()#"\\]|\\[^A-Za-z0-9]|\\[trn]') unquoted_argument = pp.Combine(pp.OneOrMore(unquoted_element)) argument = bracket_argument | quoted_argument | unquoted_argument line_comment = pp.Combine("#" + ~bracket_open + pp.SkipTo(pp.LineEnd())) bracket_comment = pp.Combine("#" + bracket_argument) line_ending = ( space_star + pp.ZeroOrMore(bracket_comment + space_star) + pp.Optional(line_comment) + (newline | pp.lineEnd) ) identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") arguments = pp.Forward() arguments << pp.ZeroOrMore( argument | line_ending | space_plus | "(" + arguments + ")" ).leaveWhitespace() arguments = pp.Group(arguments) PAREN_L, PAREN_R = map(pp.Suppress, "()") command_invocation = ( identifier + space_star.suppress() + PAREN_L + arguments + PAREN_R ).setParseAction(lambda t: (t[0], t[1].asList())) file_element = ( space_star + command_invocation + line_ending | line_ending ).leaveWhitespace() file = pp.ZeroOrMore(file_element) self._parser = file
def preprocess(self, raw_inp): Var = pyp.Combine('v' + pyp.Word(pyp.nums)) Const = pyp.Combine( pyp.Optional(pyp.Literal('-')) + pyp.Word(pyp.nums)) Term = Var | Const BinaryOp = pyp.oneOf('+ - * / % < <= == && ||') UnaryOp = pyp.oneOf('!') Expr = pyp.Forward() BinaryExpr = pyp.Group( pyp.Literal('(') + Expr + BinaryOp + Expr + pyp.Literal(')')) UnaryExpr = pyp.Group(UnaryOp + Expr) SelectExpr = pyp.Group(pyp.oneOf('min max') + pyp.Literal('(') + Expr + pyp.Literal(',') + Expr + pyp.Literal(')')) \ | pyp.Group(pyp.Literal('select') + pyp.Literal('(') + Expr + pyp.Literal(',') + Expr + pyp.Literal(',') + Expr + pyp.Literal(')')) Expr << (BinaryExpr | UnaryExpr | SelectExpr | Term) return Expr.parseString(raw_inp)
def BNF(self): # VLIB_CLI_COMMAND (show_sr_tunnel_command, static) = { # .path = "show sr tunnel", # .short_help = "show sr tunnel [name <sr-tunnel-name>]", # .function = show_sr_tunnel_fn, # }; cs = pp.Forward() member = pp.Combine(dot + varName + pp.Optional("[" + arrayIndex + "]"), adjacent=False) value = expr | cs entry = pp.Group(pp.Optional(member + equals, default="") + value) entries = (pp.ZeroOrMore(entry + comma) + entry + pp.Optional(comma)) | (pp.ZeroOrMore(entry + comma)) cs << (lbrace + entries + rbrace) macroName = ident params = pp.Group(pp.ZeroOrMore(expr + comma) + expr) macroParams = lbracket + params + rbracket function_macro = super(MacroInitializer, self).BNF() mi = function_macro + equals + pp.Group(cs) + semicolon mi.ignore(pp.cppStyleComment) return mi
class TestCombine(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc="Parsing real numbers - fail, parsed numbers are in pieces", expr=(pp.Word(pp.nums) + "." + pp.Word(pp.nums))[...], text="1.2 2.3 3.1416 98.6", expected_list=[ "1", ".", "2", "2", ".", "3", "3", ".", "1416", "98", ".", "6", ], ), PpTestSpec( desc= "Parsing real numbers - better, use Combine to combine multiple tokens into one", expr=pp.Combine(pp.Word(pp.nums) + "." + pp.Word(pp.nums))[...], text="1.2 2.3 3.1416 98.6", expected_list=["1.2", "2.3", "3.1416", "98.6"], ), ]
def _parse_expr(text, ldelim='(', rdelim=')'): """ Parse mathematical expression using PyParsing """ var = pyparsing.Word(pyparsing.alphas+'_', pyparsing.alphanums+'_') point = pyparsing.Literal('.') exp = pyparsing.CaselessLiteral('E') number = pyparsing.Combine( pyparsing.Word('+-'+pyparsing.nums, pyparsing.nums)+ pyparsing.Optional( point+pyparsing.Optional(pyparsing.Word(pyparsing.nums)) )+ pyparsing.Optional( exp+pyparsing.Word('+-'+pyparsing.nums, pyparsing.nums) ) ) atom = var | number oplist = [ (pyparsing.Literal('**'), 2, pyparsing.opAssoc.RIGHT), (pyparsing.oneOf('+ - ~'), 1, pyparsing.opAssoc.RIGHT), (pyparsing.oneOf('* / // %'), 2, pyparsing.opAssoc.LEFT), (pyparsing.oneOf('+ -'), 2, pyparsing.opAssoc.LEFT), (pyparsing.oneOf('<< >>'), 2, pyparsing.opAssoc.LEFT), (pyparsing.Literal('&'), 2, pyparsing.opAssoc.LEFT), (pyparsing.Literal('^'), 2, pyparsing.opAssoc.LEFT), (pyparsing.Literal('|'), 2, pyparsing.opAssoc.LEFT), ] # Get functions expr = pyparsing.infixNotation( atom, oplist, lpar=pyparsing.Suppress(ldelim), rpar=pyparsing.Suppress(rdelim) ) return expr.parseString(text)[0]
def from_str(s, clazz, sympy_coefficients=False): element_parser = clazz.parser() if sympy_coefficients: from sympy.parsing.sympy_parser import parse_expr coeff_s = pp.QuotedString("[", endQuoteChar="]") coeff_s.setParseAction(lambda t: [parse_expr(t[0])]) coeff = pp.Optional(coeff_s, 1) else: coeff_i = pp.Suppress("[") + pp.Word(pp.nums) + pp.Suppress("]") coeff_i.setParseAction(lambda t: [int(t[0])]) coeff_f = pp.Suppress("[") + pp.Combine( pp.Optional(pp.Word(pp.nums)) + "." + pp.Optional(pp.Word(pp.nums))) + pp.Suppress("]") coeff_f.setParseAction(lambda t: [float(t[0])]) coeff = pp.Optional(coeff_i | coeff_f, 1) if six.PY2: minus = pp.Literal("-") else: #In python 3, where str is unicode, it is easy to allow the minus sign character. #This means you can copy from a formula in a pdf minus = pp.Literal("-") | pp.Literal(chr(0x2212)) minus.setParseAction(lambda t: ["-"]) firstTerm = pp.Optional(minus, "+") + coeff + pp.Optional( element_parser, "") otherTerm = (pp.Literal("+") | minus) + coeff + pp.Optional( element_parser, "") expn = pp.Group(firstTerm) + pp.ZeroOrMore(pp.Group(otherTerm)) exp = expn.parseString(s, True) x = [(b if a == "+" else -b) * clazz.from_str(c) for a, b, c in exp] out = functools.reduce(operator.add, x) return out