def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! FTR: this is hideous. ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith, nums, printables gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) def _handle_ip(*x): a,b,c = x[2] return ' %s = { %s }' % (a,c[0]) def _handle_diraddr(*x): a,b,c = x[2] self._set(DIRADDRESSES, ' %s' % '\n '.join(c)) return def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_name = np((NAME,), action=lambda x: self._set_name(x[2])) gr_address = np((ADDRESS,), action=self._parse_setter(ADDRESS)) gr_fd_conn = np(PList('fd connect timeout'), gr_number, self._parse_setter(FD_CONNECT_TIMEOUT, True)) gr_heart = np(PList('heartbeat interval'), gr_number, self._parse_setter(HEARTBEATINTERVAL, True)) gr_max_con = np(PList('maximum console connections'), gr_number, self._parse_setter(MAXIMUMCONSOLECONNECTIONS, True)) gr_max_jobs = np(PList('maximum concurrent jobs'), gr_number, action=self._parse_setter(MAXIMUMCONCURRENTJOBS, True)) gr_pass = np((PASSWORD,), action=self._parse_setter(PASSWORD)) gr_pid = np(PList('pid directory'), action=self._parse_setter(PIDDIRECTORY)) gr_query = np(PList('query file'), action=self._parse_setter(QUERYFILE)) gr_scripts = np(PList('scripts directory'), action=self._parse_setter(SCRIPTS_DIRECTORY)) gr_sd_conn = np(PList('sd connect timeout'), gr_number, self._parse_setter(SD_CONNECT_TIMEOUT, True)) gr_source = np(PList('source address'), action=self._parse_setter(SOURCEADDRESS)) gr_stats = np(PList('statistics retention'), action=self._parse_setter(STATISTICS_RETENTION)) gr_verid = np((VERID,), action=self._parse_setter(VERID)) gr_messages = np((MESSAGES,), action=lambda x:self._parse_setter(MESSAGE_ID, dereference=True)) gr_work_dir = np(PList('working directory'), action=self._parse_setter(WORKINGDIRECTORY)) gr_port = np(PList('dir port'), gr_number, self._parse_setter(PORT, True)) gr_monitor = np((MONITOR,), gr_yn, action=self._parse_setter(MONITOR)) # This is a complicated one da_addr = np(('Addr','Port'), Word(printables), lambda x,y,z: ' '.join(z)) da_ip = np(('IPv4','IPv6','IP'), nestedExpr('{','}', OneOrMore(da_addr).setParseAction(lambda x,y,z: ' ; '.join(z)))).setParseAction(_handle_ip) da_addresses = np(PList('dir addresses'), nestedExpr('{','}', OneOrMore(da_ip)), _handle_diraddr) gr_res = OneOrMore(gr_name | gr_address | gr_fd_conn | gr_heart | gr_max_con | gr_max_jobs | gr_pass | gr_pid | gr_query | gr_scripts | gr_sd_conn | gr_source | gr_stats | gr_verid | gr_messages | gr_work_dir | gr_port | gr_monitor | da_addresses) result = gr_res.parseString(string, parseAll=True) return 'Director: ' + self[NAME]
def compile(): LBRACE, RBRACE, LBRACK, RBRACK, COLON = map(pp.Suppress, '{}[]:') value = pp.Forward() true = pp.Keyword('true').setParseAction(pp.replaceWith(True)) false = pp.Keyword('false').setParseAction(pp.replaceWith(False)) null = pp.Keyword('null').setParseAction(pp.replaceWith(None)) number = (pp.Regex( r'-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?').setParseAction( pp.tokenMap(float))) string = (pp.Regex( r'"([ !#-\[\]-\U0010ffff]+' r'|\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4}))*"').setParseAction( pp.tokenMap(json_unescape))) items = pp.delimitedList(value) array = (pp.Group(LBRACK - pp.Optional(items) + RBRACK).setParseAction(lambda t: t.asList())) member = pp.Group(string + COLON + value) members = pp.delimitedList(member) object = (pp.Dict(LBRACE - pp.Optional(members) + RBRACE).setParseAction(lambda t: t.asDict())) value << (object | array | string | number | true | false | null) json = value('top') + pp.StringEnd() json.setDefaultWhitespaceChars(' \t\n\r') json.parseWithTabs() return lambda s: json.parseString(s)['top']
def get_parser_atoms(self): """ Function defining the atoms of the grammar """ atoms = super(LanguageMathematica, self).get_parser_atoms() atoms['assign'] = (Literal("=") | Literal(":=") | Literal("==")) atoms['consts'] = Keyword("Pi").setParseAction(replaceWith('PI')) |\ Keyword("E").setParseAction(replaceWith('E')) return atoms
def get_parser_atoms(self): """ Function defining the atoms of the grammar """ atoms = super(LanguageMathematica, self).get_parser_atoms() atoms["assign"] = Literal("=") | Literal(":=") | Literal("==") atoms["consts"] = Keyword("Pi").setParseAction( replaceWith("PI")) | Keyword("E").setParseAction(replaceWith("E")) return atoms
def jsParse(inStr): # This disaster is a context-free grammar parser for parsing javascript object literals. # It needs to be able to handle a lot of the definitional messes you find in in-the-wild # javascript object literals. # Unfortunately, Javascript is /way/ more tolerant then JSON when it comes to object literals # so we can't just parse objects using python's `json` library. TRUE = pp.Keyword("true").setParseAction( pp.replaceWith(True) ) FALSE = pp.Keyword("false").setParseAction( pp.replaceWith(False) ) NULL = pp.Keyword("null").setParseAction( pp.replaceWith(None) ) jsonString = pp.quotedString.setParseAction( pp.removeQuotes ) jsonNumber = pp.Combine( pp.Optional('-') + ( '0' | pp.Word('123456789',pp.nums) ) + pp.Optional( '.' + pp.Word(pp.nums) ) + pp.Optional( pp.Word('eE',exact=1) + pp.Word(pp.nums+'+-',pp.nums) ) ) jsonObject = pp.Forward() jsonValue = pp.Forward() jsonDict = pp.Forward() jsonArray = pp.Forward() jsonElements = pp.Forward() rawText = pp.Regex('[a-zA-Z_$][0-9a-zA-Z_$]*') commaToNull = pp.Word(',,', exact=1).setParseAction(pp.replaceWith(None)) jsonElements << pp.ZeroOrMore(commaToNull) + pp.Optional(jsonObject) + pp.ZeroOrMore((pp.Suppress(',') + jsonObject) | commaToNull) jsonValue << ( jsonString | jsonNumber | TRUE | FALSE | NULL ) dictMembers = pp.delimitedList( pp.Group( (rawText | jsonString) + pp.Suppress(':') + (jsonValue | jsonDict | jsonArray))) jsonDict << ( pp.Dict( pp.Suppress('{') + pp.Optional(dictMembers) + pp.ZeroOrMore(pp.Suppress(',')) + pp.Suppress('}') ) ) jsonArray << ( pp.Group(pp.Suppress('[') + pp.Optional(jsonElements) + pp.Suppress(']') ) ) jsonObject << (jsonValue | jsonDict | jsonArray) jsonComment = pp.cppStyleComment jsonObject.ignore( jsonComment ) def convertDict(s, l, toks): return dict(toks.asList()) def convertNumbers(s,l,toks): n = toks[0] try: return int(n) except ValueError: return float(n) jsonNumber.setParseAction(convertNumbers) jsonDict.setParseAction(convertDict) # jsonObject.setDebug() jsonObject.parseString('"inStr"').pop() return jsonObject.parseString(inStr).pop()
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction(removeQuotes) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_name = Keyword('name', caseless=True) + gr_eq + gr_opt_quoted_string gr_name.setParseAction(lambda x, y=self: y._set_name(x[2])) gr_yn = Keyword('yes', caseless=True).setParseAction( replaceWith('1')) | Keyword('no', caseless=True).setParseAction( replaceWith('0')) gr_phrase = Group( OneOrMore(gr_stripped_string | Word(alphanums)) + gr_eq + gr_opt_quoted_string) def np(words, fn=gr_opt_quoted_string, action=print): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_ifsc = np(PList('Ignore File Set Changes'), gr_yn, action=self._parse_setter(IGNORECHANGES)) gr_evss = np(PList('Enable VSS'), gr_yn, action=self._parse_setter(VSSENABLED)) gr_i_option = Group( Keyword(OPTIONS, caseless=True) + nestedExpr('{', '}', Regex('[^\}]+', re.MULTILINE))) gr_e_option = gr_i_option.copy() gr_i_file = gr_phrase.copy() gr_e_file = gr_phrase.copy() gr_inc = Keyword('include', caseless=True) + nestedExpr( '{', '}', OneOrMore(gr_i_option | gr_i_file)) gr_inc.addParseAction(self._parse_add_entry) gr_exc = Keyword('exclude', caseless=True) + nestedExpr( '{', '}', OneOrMore(gr_e_option | gr_e_file)) gr_exc.addParseAction(self._parse_add_entry) gr_res = OneOrMore(gr_name | gr_inc | gr_exc | gr_ifsc | gr_evss) result = gr_res.parseString(string, parseAll=True) return 'Fileset: ' + self[NAME]
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith, nums gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction(removeQuotes) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction( replaceWith('1')) | Keyword('no', caseless=True).setParseAction( replaceWith('0')) def np(words, fn=gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_line = np((NAME, ), action=lambda x: self._set_name(x[2])) gr_line = gr_line | np( PList('sd port'), gr_number, action=self._parse_setter(SDPORT)) gr_line = gr_line | np((ADDRESS, ), action=self._parse_setter(ADDRESS)) gr_line = gr_line | np( (PASSWORD, ), action=self._parse_setter(PASSWORD)) gr_line = gr_line | np((DEVICE, ), action=self._parse_setter(DEVICE)) gr_line = gr_line | np(PList('media type'), action=self._parse_setter(MEDIATYPE)) gr_line = gr_line | np(PList('auto changer'), gr_yn, action=self._parse_setter(AUTOCHANGER)) gr_line = gr_line | np( PList('maximum concurrent jobs'), gr_number, action=self._parse_setter(MAXIMUMCONCURRENTJOBS)) gr_line = gr_line | np(PList('allow compression'), gr_yn, action=self._parse_setter(ALLOWCOMPRESSION)) gr_line = gr_line | np(PList('heartbeat interval'), action=self._parse_setter(HEARTBEATINTERVAL)) gr_res = OneOrMore(gr_line) result = gr_res.parseString(string, parseAll=True) return 'Storage: ' + self[NAME]
def date(name=None, compulsory=False): """ Creates the grammar for a Date (D) field, accepting only numbers in a certain pattern. The field can be compulsory, in which case the empty date, composed only of zeros, is disallowed. :param name: name for the field :param compulsory: indicates if the empty date is disallowed :return: grammar for the date field """ if name is None: name = 'Date Field' # Basic field # This regex allows values from 00000101 to 99991231 field = pp.Regex('[0-9][0-9][0-9][0-9](0[1-9]|1[0-2])(0[1-9]|[1-2][0-9]|3[0-1])') # Parse action field.setParseAction(lambda d: datetime.datetime.strptime(d[0], '%Y%m%d').date()) # Name field.setName(name) if not compulsory: # If it is not compulsory the empty date is accepted optional = pp.Regex('[0]{8}') optional.setParseAction(pp.replaceWith(None)) # Name optional.setName(name) # If it is not compulsory the empty date is accepted empty = pp.Regex('[ ]{8}') empty.setParseAction(pp.replaceWith(None)) # Name empty.setName(name) field = field | optional | empty # Name field.setName(name) # White spaces are not removed field.leaveWhitespace() return field
def wrap_as_optional(self, field, name, columns): """ Adds a wrapper rule to the field to accept empty strings. This empty string should be of the same size as the columns parameter. One smaller or bigger will be rejected. This wrapper will return None if the field is empty. :param field: the field to wrap :param name: name of the field :param columns: number of columns it takes :return: the field with an additional rule to allow empty strings """ # Regular expression accepting as many whitespaces as columns field_empty = pp.Regex('[ ]{' + str(columns) + '}') field_empty.setName(name) # Whitespaces are not removed field_empty.leaveWhitespace() # None is returned by this rule field_empty.setParseAction(pp.replaceWith(None)) field_empty = field_empty.setResultsName(field.resultsName) field = field | field_empty field.setName(name) field.leaveWhitespace() return field
def parseString(s): goto = CaselessLiteral("goto") letters = "ABCDEFGHIJKLMNOPRSTUVZYXWQ0123456789_-." lowerLet = letters.lower() caps = "ABCSXYZ" lowerCaps = caps.lower() digits = "0123456789" parenthesesL = Suppress("(") parenthesesR = Suppress(")") dot = "." minus = "-" comma = Literal(",").setParseAction(replaceWith(".")) semicolon = Suppress(";") element = Word(caps, max=1) | Word(lowerCaps, max=1) number = Word(digits) integer = Optional(minus) + number floa = Combine(integer + Optional(comma + number) + Optional(dot + number)) elementRef = element + Optional(floa) targetName = Word(letters, max=13) | Word(lowerLet, max=13) attribute = elementRef | targetName #go = Group(goto + parenthesesL + OneOrMore( elementRef + Optional(semicolon)) + parenthesesR) go = Group(goto + parenthesesL + OneOrMore(attribute + Optional(semicolon)) + parenthesesR) command = go + ZeroOrMore(go) #command = goto + parenthesesL + OneOrMore( elementRef + Optional(semicolon)) + parenthesesR while True: try: formulaData = command.parseString(s) break except ValueError: return "error" return formulaData
def ipi_name_number(name=None, compulsory=False): """ IPI Name Number field. An IPI Name Number is composed of eleven digits. So, for example, an IPI Name Number code field can contain 00014107338. :param name: name for the field :param compulsory: indicates if the empty string is disallowed :return: a parser for the IPI Name Number field """ if name is None: name = 'IPI Name Number Field' field = basic.numeric(11, compulsory=compulsory) if not compulsory: # If it is not compulsory then it can be set as empty empty = pp.Regex('[ ]{11}') empty.setParseAction(pp.replaceWith(None)) empty.setName(name) field = empty | field # Name field.setName(name) field.setName(name) return field.setResultsName('ipi_name_n')
def string(allow_private=False): if allow_private: # private functions are restricted! identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") else: identifier = pp.Word(pp.alphas, pp.alphanums + "_") template = pp.Forward() pool_var = identifier.copy() var = _Parsers.builtins | pool_var.setParseAction( lambda name: _PoolVar(name)) | template special_chars = pp.Keyword('$$').setParseAction(pp.replaceWith('$')) template << (pp.Suppress('$') + pp.Suppress('{') + var + pp.ZeroOrMore( pp.Suppress('.') + _Parsers.expression( allow_private=allow_private)) + pp.Suppress('}')) def template_parse_action(toks): name = toks[0] try: funcs = toks[1:] except IndexError: funcs = [] return _TemplateVar(name, funcs) template.setParseAction(template_parse_action) restricted_chars = '$' printables = ''.join( c for c in (set(pp.printables) - set(restricted_chars))) string = pp.ZeroOrMore(special_chars | template | pp.Combine(pp.Word(printables + ' ')).leaveWhitespace()) return string
def audio_visual_key(name=None): """ Creates the grammar for an Audio Visual Key code. This is a variation on the ISAN (International Standard Audiovisual Number) :param name: name for the field :return: grammar for an ISRC field """ if name is None: name = 'AVI Field' society_code = basic.numeric(3) society_code = society_code.setName('Society Code') \ .setResultsName('society_code') av_number = basic.alphanum(15) field_empty = pp.Regex('[ ]{15}') field_empty.setParseAction(pp.replaceWith('')) av_number = av_number | field_empty av_number = av_number.setName('Audio-Visual Number') \ .setResultsName('av_number') field = pp.Group(society_code + av_number) field.setParseAction(lambda v: _to_avi(v[0])) field = field.setName(name) return field.setResultsName('audio_visual_key')
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith, nums gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_line = np((NAME,), action=lambda x: self._set_name(x[2])) gr_line = gr_line | np(PList('sd port'), gr_number, action=self._parse_setter(SDPORT)) gr_line = gr_line | np((ADDRESS,), action=self._parse_setter(ADDRESS)) gr_line = gr_line | np((PASSWORD,), action=self._parse_setter(PASSWORD)) gr_line = gr_line | np((DEVICE,), action=self._parse_setter(DEVICE)) gr_line = gr_line | np(PList('media type'), action=self._parse_setter(MEDIATYPE)) gr_line = gr_line | np(PList('auto changer'), gr_yn, action=self._parse_setter(AUTOCHANGER)) gr_line = gr_line | np(PList('maximum concurrent jobs'), gr_number, action=self._parse_setter(MAXIMUMCONCURRENTJOBS)) gr_line = gr_line | np(PList('allow compression'), gr_yn, action=self._parse_setter(ALLOWCOMPRESSION)) gr_line = gr_line | np(PList('heartbeat interval'), action=self._parse_setter(HEARTBEATINTERVAL)) gr_res = OneOrMore(gr_line) result = gr_res.parseString(string, parseAll=True) return 'Storage: ' + self[NAME]
def Syntax(): imp = Literal(u'⇒').setParseAction(replaceWith("Implies")) conj = Literal(u'∧').setParseAction(replaceWith("Conjunction")) disj = Literal(u'∨').setParseAction(replaceWith("Disjunction")) iff = Literal(u'⇔').setParseAction(replaceWith("Disjunction")) #op = oneOf(u'⇒ ∧') op = imp | conj | disj | iff lpar = Literal('(').suppress() rpar = Literal(')').suppress() neg = Literal(u'¬').setParseAction(replaceWith("Negation")) prop = Word(u"pqr") expr = Forward() atom = prop | Group(lpar + expr + rpar) expr << ((atom + ZeroOrMore(op + expr)) | Group(neg + expr)) return expr
def parser(env): """Given an environment, return an s-expression parser that resolves literals and environment variables. The defined literals are * integer and float numbers * true and false to represent Python's True and False * null to represent Python's None * strings that are not environment keys """ envval = pp.oneOf(env.keys()).setParseAction(lambda t: env[t[0]]) number = pp.pyparsing_common.number true = pp.Keyword("true").setParseAction(pp.replaceWith(True)) false = pp.Keyword("false").setParseAction(pp.replaceWith(False)) null = pp.Keyword("null").setParseAction(pp.replaceWith(None)) char = pp.Word(pp.alphas + "_") return pp.nestedExpr(content=pp.OneOrMore(number | true | false | null | envval | char))
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import quotedString, restOfLine, Keyword, nestedExpr, OneOrMore, Word, Literal, removeQuotes, nums, replaceWith, printables gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) def _handle_ip(*x): a,b,c = x[2] return ' %s = { %s }' % (a,c[0]) def _handle_fdaddr(*x): a,b,c = x[2] self._set(FDADDRESSES, ' %s' % '\n '.join(c)) return def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_line = np((NAME,), action=lambda x: self._set_name(x[2])) gr_line = gr_line | np((ADDRESS,), action=self._parse_setter(ADDRESS)) gr_line = gr_line | np((CATALOG,), action=self._parse_setter(CATALOG_ID, dereference=True)) gr_line = gr_line | np((PASSWORD,), action=self._parse_setter(PASSWORD)) gr_line = gr_line | np(PList('file retention'), action=self._parse_setter(FILERETENTION)) gr_line = gr_line | np(PList('job retention'), action=self._parse_setter(JOBRETENTION)) gr_line = gr_line | np((PRIORITY,), gr_number, action=self._parse_setter(PRIORITY)) gr_line = gr_line | np(PList('working directory'), action=self._parse_setter(WORKINGDIRECTORY)) gr_line = gr_line | np(PList('pid directory'), action=self._parse_setter(PIDDIRECTORY)) gr_line = gr_line | np(PList('heart beat interval'), action=self._parse_setter(HEARTBEATINTERVAL)) gr_line = gr_line | np(PList('fd address'), action=self._parse_setter(FDADDRESS)) gr_line = gr_line | np(PList('fd source address'), action=self._parse_setter(FDSOURCEADDRESS)) gr_line = gr_line | np(PList('pki key pair'), action=self._parse_setter(PKIKEYPAIR)) gr_line = gr_line | np(PList('pki master key'), action=self._parse_setter(PKIMASTERKEY)) gr_line = gr_line | np(PList('fd port'), gr_number, action=self._parse_setter(FDPORT)) gr_line = gr_line | np(PList('auto prune'), gr_yn, action=self._parse_setter(AUTOPRUNE)) gr_line = gr_line | np(PList('maximum concurrent jobs'), gr_number, action=self._parse_setter(FDPORT)) gr_line = gr_line | np(PList('pki encryption'), gr_yn, action=self._parse_setter(PKIENCRYPTION)) gr_line = gr_line | np(PList('pki signatures'), gr_yn, action=self._parse_setter(PKISIGNATURES)) # This is a complicated one da_addr = np(('Addr','Port'), Word(printables), lambda x,y,z: ' '.join(z)) da_ip = np(('IPv4','IPv6','IP'), nestedExpr('{','}', OneOrMore(da_addr).setParseAction(lambda x,y,z: ' ; '.join(z)))).setParseAction(_handle_ip) da_addresses = np(('fd addresses', FDADDRESSES), nestedExpr('{','}', OneOrMore(da_ip)), _handle_fdaddr) gr_res = OneOrMore(gr_line|da_addresses) result = gr_res.parseString(string, parseAll=True) return 'Client: ' + self[NAME]
def get_parser_atoms(self): """ Function defining the atoms of the grammar """ atoms = super(LanguagePython, self).get_parser_atoms() atoms["exp"] = Literal("**").setParseAction(replaceWith("^")) atoms["consts"] = Keyword("np.pi").setParseAction( replaceWith("PI")) | Keyword("np.e").setParseAction( replaceWith("E")) if self.int2float: point = Literal(".") e = CaselessKeyword("E") atoms["float"] = Word("+-" + nums, nums).setParseAction( appendString(".")) | Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) return atoms
def get_parser_atoms(self): """ Function defining the atoms of the grammar """ atoms = super(LanguagePython, self).get_parser_atoms() atoms['exp'] = Literal("**").setParseAction(replaceWith('^')) atoms['consts'] = \ Keyword('np.pi').setParseAction(replaceWith('PI')) |\ Keyword('np.e').setParseAction(replaceWith('E')) if self.int2float: point = Literal(".") e = CaselessKeyword("E") atoms['float'] = \ Word("+-" + nums, nums).setParseAction(appendString('.')) |\ Combine(Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums)) ) return atoms
def _make_valid_state_name(self, state_name): """Transform the input state_name into a valid state in XMLBIF. XMLBIF states must start with a letter an only contain letters, numbers and underscores. """ s = str(state_name) s_fixed = pp.CharsNotIn(pp.alphanums + "_").setParseAction(pp.replaceWith("_")).transformString(s) if not s_fixed[0].isalpha(): s_fixed = "state" + s_fixed return s_fixed
def iswc(name=None, compulsory=False): """ ISWC field. A ISWC code written on a field follows the Pattern TNNNNNNNNNC. This being: - T: header, it is always T. - N: numeric value. - C: control digit. So, for example, an ISWC code field can contain T0345246801. :param name: name for the field :param compulsory: indicates if the empty string is disallowed :return: a parser for the ISWC field """ if name is None: name = 'ISWC Field' # Header is always T header = pp.Literal('T').suppress() header = header.setName('ISWC Header').setResultsName('header') # ID code is composed of 9 numbers id_code = basic.numeric(9, compulsory=True) id_code = id_code.setName('ID Code').setResultsName('id_code') # Check digit is a single number check_digit = basic.numeric(1, compulsory=True) check_digit = check_digit.setName('Check Digit').setResultsName('check_digit') # T followed by 10 numbers field = pp.Group(header + id_code + check_digit) # Parse action field.setParseAction(lambda c: _to_iswccode(c[0])) # Name field.setName(name) if not compulsory: # If it is not compulsory then it can be set as empty empty = pp.Regex('[ ]{11}') empty.setParseAction(pp.replaceWith(None)) empty.setName(name) field = empty | field # Name field.setName(name) # White spaces are not removed field.leaveWhitespace() return field.setResultsName('iswc')
def __init__(self): FALSE = Keyword("false") NULL = Keyword("null") TRUE = Keyword("true") FALSE.setParseAction(replaceWith(False)) NULL.setParseAction(replaceWith(None)) TRUE.setParseAction(replaceWith(True)) pattern = Forward() label = Word(alphas, alphanums + "_").setResultsName("layer_name") configurable_param = nestedExpr(content=pattern) arg = (NULL ^ FALSE ^ TRUE ^ pyparsing_common.number ^ (Word(alphanums + "*_") + ~Word("=")) ^ configurable_param) args = arg[...].setResultsName("args") args.setParseAction(self.convert_list) options = Dict(Group(Word(alphanums + "_") + Suppress("=") + arg))[...].setResultsName("options") options.setParseAction(self.convert_dict) pattern <<= label + args + options pattern.setParseAction(Pattern) self.pattern = pattern
def F(tokens): if 'args' in tokens: # Fun Prefix Postfix Bifix targs = tokens.args elif 'arg1' in tokens and 'arg2' in tokens: if 'arg3' in tokens: targs = targs = tokens.arg1, tokens.arg2, tokens.arg3 # Infix3 else: targs = tokens.arg1, tokens.arg2 # Infix exp = rhs for arg, targ in zip(args, targs): argsub = pp.Literal(arg).setParseAction(pp.replaceWith(targ)) # subsitute the arguments exp = argsub.transformString(exp) return exp
def parser_factory(styler): """Builds the S-expression parser.""" def cond_optional(expr): return pp.Optional(expr) if styler else expr LPAR, RPAR, SQUO, DQUO = map(pp.Suppress, '()\'"') form_first = pp.Forward() form = pp.Forward() nil = pp.CaselessKeyword('nil').addParseAction(pp.replaceWith([])) t = pp.CaselessKeyword('t').addParseAction(pp.replaceWith(True)) constant = styler('class:constant', nil | t) number = styler('class:number', ppc.number).setName('number') control_chars = ''.join(map(chr, range(0, 32))) + '\x7f' symbol = pp.CharsNotIn(control_chars + '\'"`;,()[]{} ') symbol = styler('class:symbol', symbol).setName('symbol') symbol.addParseAction(lambda t: Symbol(t[0])) call = styler('class:call', symbol) string = DQUO + pp.Combine(pp.Optional( pp.CharsNotIn('"'))) + cond_optional(DQUO) string = styler('class:string', string).setName('string') forms = (form_first + pp.ZeroOrMore(form)).setName('one or more forms') sexp = (LPAR + pp.Optional(forms) + cond_optional(RPAR)).setName('s-expression') sexp.addParseAction(lambda t: [list(t)]) quote = (styler('class:quote', SQUO) + form).setName('quoted form') quote.addParseAction(lambda t: Quote(t[0])) form_first <<= constant | number ^ call | string | sexp | quote form <<= constant | number ^ symbol | string | sexp | quote return form
def convert_to_literal(tok, val): """ Converts a value to pyparsing caselessliteral :param tok: the token we want to find/replace :type tok: str :param val: the integer the token represented :type val: int :return: pyparsing caseless literal :rtype: pyparsing.CaselessLiteral """ return CaselessLiteral(tok) \ .setName(tok) \ .setParseAction(replaceWith(val))
def make_arithmetic(self, s, l, tokens): digits = Word(nums) variable = Word(alphas + self._grammar.get_token("namespace_separator")) to_dot = lambda t: "." decimal_sep = Literal(self._grammar.get_token("decimal_separator")) decimal_sep.setParseAction(to_dot) thousands_sep = Suppress( self._grammar.get_token("thousands_separator")) thousands = Word( nums, max=3) + OneOrMore(thousands_sep + Word(nums, exact=3)) integers = thousands | digits decimals = decimal_sep + digits expop = Literal('^') signop = oneOf('+ -') multop = oneOf('* /') plusop = oneOf('+ -') modop = Literal('%') factop = Literal("!") operand = Combine((integers + Optional(decimals)) | variable) expr = operatorPrecedence(operand, [ (Literal("[").setParseAction(replaceWith("(")), 1, opAssoc.RIGHT), (Literal("]").setParseAction(replaceWith(")")), 1, opAssoc.LEFT), ("!", 1, opAssoc.LEFT), (expop, 2, opAssoc.RIGHT), (signop, 1, opAssoc.RIGHT), (multop, 2, opAssoc.LEFT), (modop, 2, opAssoc.LEFT), (plusop, 2, opAssoc.LEFT), ]) a = expr.parseString(tokens[0], parseAll=True) return Arithmetic(a[0], self._namespace, self._grammar.get_token("namespace_separator"))
def _get_parser(): double_escape = pp.Combine(pp.Literal(_DOUBLE_ESCAPE) + pp.MatchFirst([pp.FollowedBy(_REF_OPEN), pp.FollowedBy(_REF_CLOSE), pp.FollowedBy(_INV_OPEN), pp.FollowedBy(_INV_CLOSE)])).setParseAction(pp.replaceWith(_ESCAPE)) ref_open = pp.Literal(_REF_OPEN).suppress() ref_close = pp.Literal(_REF_CLOSE).suppress() ref_not_open = ~pp.Literal(_REF_OPEN) + ~pp.Literal(_REF_ESCAPE_OPEN) + ~pp.Literal(_REF_DOUBLE_ESCAPE_OPEN) ref_not_close = ~pp.Literal(_REF_CLOSE) + ~pp.Literal(_REF_ESCAPE_CLOSE) + ~pp.Literal(_REF_DOUBLE_ESCAPE_CLOSE) ref_escape_open = pp.Literal(_REF_ESCAPE_OPEN).setParseAction(pp.replaceWith(_REF_OPEN)) ref_escape_close = pp.Literal(_REF_ESCAPE_CLOSE).setParseAction(pp.replaceWith(_REF_CLOSE)) ref_text = pp.CharsNotIn(_REF_EXCLUDES) | pp.CharsNotIn(_REF_CLOSE_FIRST, exact=1) ref_content = pp.Combine(pp.OneOrMore(ref_not_open + ref_not_close + ref_text)) ref_string = pp.MatchFirst([double_escape, ref_escape_open, ref_escape_close, ref_content]).setParseAction(_string) ref_item = pp.Forward() ref_items = pp.OneOrMore(ref_item) reference = (ref_open + pp.Group(ref_items) + ref_close).setParseAction(_reference) ref_item << (reference | ref_string) inv_open = pp.Literal(_INV_OPEN).suppress() inv_close = pp.Literal(_INV_CLOSE).suppress() inv_not_open = ~pp.Literal(_INV_OPEN) + ~pp.Literal(_INV_ESCAPE_OPEN) + ~pp.Literal(_INV_DOUBLE_ESCAPE_OPEN) inv_not_close = ~pp.Literal(_INV_CLOSE) + ~pp.Literal(_INV_ESCAPE_CLOSE) + ~pp.Literal(_INV_DOUBLE_ESCAPE_CLOSE) inv_escape_open = pp.Literal(_INV_ESCAPE_OPEN).setParseAction(pp.replaceWith(_INV_OPEN)) inv_escape_close = pp.Literal(_INV_ESCAPE_CLOSE).setParseAction(pp.replaceWith(_INV_CLOSE)) inv_text = pp.CharsNotIn(_INV_CLOSE_FIRST) inv_content = pp.Combine(pp.OneOrMore(inv_not_close + inv_text)) inv_string = pp.MatchFirst([double_escape, inv_escape_open, inv_escape_close, inv_content]).setParseAction(_string) inv_items = pp.OneOrMore(inv_string) export = (inv_open + pp.Group(inv_items) + inv_close).setParseAction(_invquery) text = pp.CharsNotIn(_EXCLUDES) | pp.CharsNotIn('', exact=1) content = pp.Combine(pp.OneOrMore(ref_not_open + inv_not_open + text)) string = pp.MatchFirst([double_escape, ref_escape_open, inv_escape_open, content]).setParseAction(_string) item = reference | export | string line = pp.OneOrMore(item) + pp.StringEnd() return line
def char_code(columns, name=None, compulsory=False): """ Character set code field. :param name: name for the field :param compulsory: indicates if the empty string is disallowed :return: an instance of the Character set code field rules """ if name is None: name = 'Char Code Field (' + str(columns) + ' columns)' if columns <= 0: raise BaseException() char_sets = None for char_set in _tables.character_sets(): regex = '[ ]{' + str(15 - len(char_set)) + '}' + char_set if char_sets is None: char_sets = regex else: char_sets += '|' + regex # Accepted sets _character_sets = pp.Regex(char_sets) _unicode_1_16b = pp.Regex('U\+0[0-8,A-F]{3}[ ]{' + str(columns - 6) + '}') _unicode_2_21b = pp.Regex('U\+0[0-8,A-F]{4}[ ]{' + str(columns - 7) + '}') # Basic field char_code_field = (_character_sets | _unicode_1_16b | _unicode_2_21b) # Parse action char_code_field = char_code_field.setParseAction(lambda s: s[0].strip()) # Name char_code_field.setName(name) if not compulsory: char_code_field_empty = pp.Regex('[ ]{' + str(columns) + '}') char_code_field_empty.setName(name) char_code_field_empty.leaveWhitespace() char_code_field_empty.setParseAction(pp.replaceWith(None)) char_code_field = char_code_field | char_code_field_empty char_code_field.setName(name) return char_code_field
def __init__(self): # codec used for encoding of usermessages self.codec = lightpile.codec EOL = pp.LineEnd() SOL = pp.LineStart().leaveWhitespace() blankline = SOL + EOL comment = '#' + pp.restOfLine + EOL self.comment_stripper = comment.setParseAction(pp.replaceWith("\n")) self.blankline_stripper = blankline.setParseAction(pp.replaceWith("")) # filegrammar ws = ' \t' standard_chars = pp.printables.replace('#', '') text = pp.OneOrMore(pp.White(ws) | pp.quotedString | pp.Word(standard_chars)) text.setParseAction(lambda tokens: ''.join(tokens)) lbracket = pp.Literal("[").suppress() rbracket = pp.Literal("]").suppress() # identifiers have to start with a unicode-letter and can continue # with any number of unicode-letters or any of # ':', '%', '+', '-', '_', '.', ',' self.identifier = pp.Regex(r'[^\W\d]([^\W]|[%:_,\+\-\.])*', re.U) sectionhead = (lbracket + self.identifier + rbracket + EOL.suppress()) sectionline = ~sectionhead + ~pp.StringEnd() + pp.restOfLine + EOL sectionlines = pp.ZeroOrMore(sectionline) sectionlines.setParseAction(lambda tokens: ''.join(tokens)) self.filegrammar = pp.dictOf(sectionhead, sectionlines) self._init_sectiongrammars()
def compiled_sql(self): where_op = oneOf('> >= < <= = IN !=') | Literal("NOT IN") number = Word(nums) string = (QuotedString(quoteChar="'") | QuotedString( quoteChar='"')).setParseAction(lambda x: '"' + x[0] + '"') where_val = number | string lpar = Literal('(') rpar = Literal(')') tables = Group( Word(alphas) + ZeroOrMore(Suppress(Literal('>>>')) + Word(alphas))) tbl_col = tables + Suppress(Literal('.')) + Word(alphas) where_clause = tbl_col + where_op + where_val where_clause.setParseAction(lambda x: self._make_atomic( tables=x[0], col_name=x[1], op=x[2], value=x[3])) expr = Forward() atom = where_clause | (lpar + expr + rpar) op = Literal("&").setParseAction( replaceWith("INTERSECT")) | Literal("|").setParseAction( replaceWith("UNION")) expr << (atom + ZeroOrMore(op + expr)) | ( Suppress("(") + atom + ZeroOrMore(op + expr) + Suppress(")")) results = expr.parseString(self.pre_query) print(' '.join(results)) return results
def adblock_content(self, lurl): url_dom = urlparse(lurl).hostname # TODO add option stimeout self.browser.load(lurl, load_timeout=120, tries=3) # soup = self.browser.soup.encode('utf-8') # при таком парсинге кодировка слетает # РЕШЕНИЕ: через QString который превращаем в unicode html_str = unicode(self.browser.webframe.toHtml().toUtf8(), encoding="UTF-8") html_str = self.apply_css_sel(html_str, url_dom) removetext = pyparsing.replaceWith("") pyparsing.htmlComment.setParseAction(removetext) pyparsing.commonHTMLEntity.setParseAction(pyparsing.replaceHTMLEntity) text_str = (pyparsing.htmlComment | pyparsing.commonHTMLEntity).transformString(html_str) # text_str = self.apply_css_sel(text_str, url_dom) for tag in ["script", "iframe", "style", "noscript"]: text_str = self.trans_tag(text_str, tag, removetext) anytag = pyparsing.anyOpenTag anyclose = pyparsing.anyCloseTag anytag.setParseAction(removetext) anyclose.setParseAction(removetext) # заменяем теги со ccылками text_str = self.trans_tag(text_str, "a", self.change_a_tag) # теги h p text_str = self.trans_tag(text_str, "h", self.change_ph_tag) text_str = self.trans_tag(text_str, "p", self.change_ph_tag) text_str = (anytag | anyclose).transformString(text_str) repeatednewlines = pyparsing.LineEnd() + pyparsing.OneOrMore( pyparsing.LineEnd()) repeatednewlines.setParseAction(pyparsing.replaceWith("\n\n")) text_str = repeatednewlines.transformString(text_str) # print("res:", text.encode('utf-8')) return text_str
def kw_one_of(*words, replace=None): """ Creates an expression of multiple caseless keywords with bitwise OR operators. If `replace` is specified, a new parse action replacing matched keywords with `repl` is added. """ if not words: raise TypeError("Arguments must include at least one word.") combined = functools.reduce(operator.or_, map(pp.CaselessKeyword, words)) if replace is not None: combined = combined.setParseAction(pp.replaceWith(replace)) return combined
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith, nums gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_line = np((NAME,), action=lambda x: self._set_name(x[2])) gr_line = gr_line | np(PList('pool type'), action=self._parse_setter(POOLTYPE)) gr_line = gr_line | np(PList('maximum volumes'), action=self._parse_setter(MAXIMUMVOLUMES)) gr_line = gr_line | np((STORAGE,), action=self._parse_setter(STORAGE)) gr_line = gr_line | np(PList('use volume once'), gr_yn, action=self._parse_setter(USEVOLUMEONCE)) gr_line = gr_line | np(PList('catalog files'), gr_yn, action=self._parse_setter(CATALOGFILES)) gr_line = gr_line | np(PList('auto prune'), gr_yn, action=self._parse_setter(AUTOPRUNE)) gr_line = gr_line | np((RECYCLE,), gr_yn, action=self._parse_setter(RECYCLE)) gr_line = gr_line | np(PList('recycle oldest volume'), gr_yn, action=self._parse_setter(RECYCLEOLDESTVOLUME)) gr_line = gr_line | np(PList('recycle current volume'), gr_yn, action=self._parse_setter(RECYCLECURRENTVOLUME)) gr_line = gr_line | np(PList('purge oldest volume'), gr_yn, action=self._parse_setter(PURGEOLDESTVOLUME)) gr_line = gr_line | np(PList('maximum volume jobs'), gr_number, action=self._parse_setter(MAXIMUMVOLUMEJOBS)) gr_line = gr_line | np(PList('maximum volume files'), gr_number, action=self._parse_setter(MAXIMUMVOLUMEFILES)) gr_line = gr_line | np(PList('maximum volume bytes'), action=self._parse_setter(MAXIMUMVOLUMEBYTES)) gr_line = gr_line | np(PList('volume use duration'), action=self._parse_setter(VOLUMEUSEDURATION)) gr_line = gr_line | np(PList('volume retention'), action=self._parse_setter(VOLUMERETENTION)) gr_line = gr_line | np(PList('action on purge'), action=self._parse_setter(ACTIONONPURGE)) gr_line = gr_line | np(PList('scratch pool'), action=self._parse_setter(SCRATCHPOOL)) gr_line = gr_line | np(PList('recycle pool'), action=self._parse_setter(RECYCLEPOOL)) gr_line = gr_line | np(PList('file retention'), action=self._parse_setter(FILERETENTION)) gr_line = gr_line | np(PList('job retention'), action=self._parse_setter(JOBRETENTION)) gr_line = gr_line | np(PList('cleaning prefix'), action=self._parse_setter(CLEANINGPREFIX)) gr_line = gr_line | np(PList('label format'), action=self._parse_setter(LABELFORMAT)) gr_res = OneOrMore(gr_line) result = gr_res.parseString(string, parseAll=True) return 'Pool: ' + self[NAME]
def numeric(columns, name=None, compulsory=False): """ Creates the grammar for a Numeric (N) field, accepting only the specified number of characters. This version only allows integers. This can be a compulsory field, in which case the zero is disallowed. :param columns: number of columns for this field :param name: name for the field :param compulsory: indicates if the zero is disallowed :return: grammar for the integer numeric field """ if name is None: name = 'Numeric Field' if columns <= 0: # Can't be empty or have negative size raise BaseException() # Only numbers are accepted field = pp.Regex('[0-9]{' + str(columns) + '}') # Parse action field.setParseAction(lambda n: _to_int(n)) field.leaveWhitespace() # Name field.setName(name) if not compulsory: empty = pp.Regex('[ ]{' + str(columns) + '}') empty.setParseAction(pp.replaceWith(None)) empty.setName(name) # White spaces are not removed empty.leaveWhitespace() field = field | empty # Name field.setName(name) return field
def remove_empty_statements(string, seperator=';'): """Remove empty statements from the string Args: string(str): String to be processed seperator(str): Seperater to be checked for duplicates Returns: result(str): String with empty statements trimmed """ if string == '': return string empty_statement = seperator + OneOrMore(seperator) empty_statement.setParseAction(replaceWith(seperator)) string = empty_statement.transformString(string) return string.lstrip(seperator)
def one_of_tags(tags, canonical_tag, name=None): """This is a convenience method for defining the tags usable in the :class:`BelParser`. For example, statements like g(HGNC:SNCA) can be expressed also as geneAbundance(HGNC:SNCA). The language must define multiple different tags that get normalized to the same thing. :param list[str] tags: a list of strings that are the tags for a function. For example, ['g', 'geneAbundance'] for the abundance of a gene :param str canonical_tag: the preferred tag name. Does not have to be one of the tags. For example, 'GeneAbundance' (note capitalization) is used for the abundance of a gene :param str name: this is the key under which the value for this tag is put in the PyParsing framework. :rtype: :class:`pyparsing.ParseElement` """ element = oneOf(tags).setParseAction(replaceWith(canonical_tag)) if name is None: return element return element.setResultsName(name)
def visan(name=None, compulsory=False): """ Creates the grammar for a V-ISAN code. This is a variation on the ISAN (International Standard Audiovisual Number) :param name: name for the field :param compulsory: indicates if the empty string is disallowed :return: grammar for an ISRC field """ if name is None: name = 'V-ISAN Field' version = basic.numeric(8) version = version.setName('Version').setResultsName('version') isan = basic.numeric(12) isan = isan.setName('ISAN').setResultsName('isan') episode = basic.numeric(4) episode = episode.setName('Episode').setResultsName('episode') check_digit = basic.numeric(1) check_digit = check_digit.setName('Check Digit').setResultsName('check_digit') field = pp.Group(version + isan + episode + check_digit) field.setParseAction(lambda v: _to_visan(v[0])) field.setName(name) if not compulsory: # If it is not compulsory then it can be set as empty empty = pp.Regex('[ ]{25}') empty.setParseAction(pp.replaceWith(None)) empty.setName(name) field = empty | field # Name field.setName(name) return field.setResultsName('visan')
def lookup(values, columns=1, name=None, compulsory=False): """ Creates the grammar for a Lookup (L) field, accepting only values from a list. The 'columns' parameter is used only in the case the field is optional. It will be used to indicate the number of whitespaces this field can take. Like in the Alphanumeric field, the result will be stripped of all heading and trailing whitespaces. :param columns: number of columns, for the case this field is left empty :param name: name for the field :param compulsory: indicates if the empty string is disallowed :return: grammar for the lookup field """ if name is None: name = 'Lookup Field' # Only the specified values are allowed lookup_field = pp.oneOf(values) lookup_field.setName(name) lookup_field.setParseAction(lambda s: s[0].strip()) lookup_field.leaveWhitespace() if not compulsory: lookup_field_option = pp.Regex('[ ]{' + str(columns) + '}') lookup_field_option.setName(name) lookup_field_option.leaveWhitespace() lookup_field_option.setParseAction(pp.replaceWith(None)) lookup_field = lookup_field | lookup_field_option lookup_field.setName(name) lookup_field.leaveWhitespace() return lookup_field
def isrc(name=None, compulsory=False): """ Creates the grammar for an ISRC code. ISRC stands for International Standard Recording Code, which is the standard ISO 3901. This stores information identifying a particular recording. :param name: name for the field :param compulsory: indicates if the empty string is disallowed :return: grammar for an ISRC field """ if name is None: name = 'ISRC Field' separator = pp.Literal('-') country = basic.alphanum(2) registrant = basic.alphanum(3) year = pp.Regex('[0-9]{2}') work_id = pp.Regex('[0-9]{2}') field = pp.Combine(country + separator + registrant + separator + year + separator + work_id) field.setName(name) if not compulsory: # If it is not compulsory then it can be set as empty empty = pp.Regex('[ ]{12}') empty.setParseAction(pp.replaceWith(None)) empty.setName(name) empty.leaveWhitespace() field = empty | field # Name field.setName(name) # TODO: Fix this field field = basic.alphanum(12, compulsory=compulsory) field = field.setName(name) return field.setResultsName('isrc')
def _or(cls, *literals, suppress=False): """ Return a MatchFirst aggregation of CaselessKeyword literals based on the supplied iterable of strings. If the supplied iterable is a dictionary, replace the keys with the values as a pyparsing parse action. If suppress is True, wrap the output in a Suppress object. :param literals: :param suppress: :return: """ if isinstance(literals[0], dict): keywords = (CaselessKeyword(l).addParseAction(replaceWith(d)) for l, d in literals[0].items()) else: keywords = (CaselessKeyword(literal) for literal in literals) match_first = MatchFirst(keywords) if suppress: return Suppress(match_first) else: return match_first
def one_of_tags( tags: List[str], canonical_tag: str, name: Optional[str] = None, ) -> ParserElement: """Define the tags usable in the :class:`BelParser`. For example, statements like ``g(HGNC:SNCA)`` can be expressed also as ``geneAbundance(HGNC:SNCA)``. The language must define multiple different tags that get normalized to the same thing. :param tags: a list of strings that are the tags for a function. For example, ['g', 'geneAbundance'] for the abundance of a gene :param canonical_tag: the preferred tag name. Does not have to be one of the tags. For example, 'GeneAbundance' (note capitalization) is used for the abundance of a gene :param name: this is the key under which the value for this tag is put in the PyParsing framework. """ element = oneOf(tags).setParseAction(replaceWith(canonical_tag)) if name is None: return element return element.setResultsName(name)
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_name = Keyword('name', caseless=True) + gr_eq + gr_opt_quoted_string gr_name.setParseAction(lambda x, y=self: y._set_name(x[2])) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) gr_phrase = Group(OneOrMore(gr_stripped_string | Word(alphanums)) + gr_eq + gr_opt_quoted_string) def np(words, fn = gr_opt_quoted_string, action=print): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_ifsc = np(PList('Ignore File Set Changes'), gr_yn, action=self._parse_setter(IGNORECHANGES)) gr_evss = np(PList('Enable VSS'), gr_yn, action=self._parse_setter(VSSENABLED)) gr_i_option = Group(Keyword(OPTIONS, caseless=True) + nestedExpr('{','}', Regex('[^\}]+', re.MULTILINE))) gr_e_option = gr_i_option.copy() gr_i_file = gr_phrase.copy() gr_e_file = gr_phrase.copy() gr_inc = Keyword('include', caseless=True) + nestedExpr('{','}', OneOrMore(gr_i_option | gr_i_file)) gr_inc.addParseAction(self._parse_add_entry) gr_exc = Keyword('exclude', caseless=True) + nestedExpr('{','}', OneOrMore(gr_e_option | gr_e_file)) gr_exc.addParseAction(self._parse_add_entry) gr_res = OneOrMore(gr_name | gr_inc | gr_exc | gr_ifsc | gr_evss) result = gr_res.parseString(string, parseAll=True) return 'Fileset: ' + self[NAME]
def audio_visual_key(name=None, compulsory=False): """ Creates the grammar for an Audio Visual Key code. This is a variation on the ISAN (International Standard Audiovisual Number) :param name: name for the field :param compulsory: indicates if the empty string is disallowed :return: grammar for an ISRC field """ if name is None: name = 'AVI Field' society_code = basic.numeric(3) society_code = society_code.setName('Society Code').setResultsName('society_code') av_number = basic.alphanum(15) av_number = av_number.setName('Audio-Visual Number').setResultsName('av_number') field = pp.Group(society_code + av_number) field.setParseAction(lambda v: _to_avi(v[0])) field = field.setName(name) if not compulsory: # If it is not compulsory then it can be set as empty empty = pp.Regex('[ ]{18}') empty.setParseAction(pp.replaceWith(None)) empty.setName(name) field = empty | field # Name field.setName(name) return field.setResultsName('audio_visual_key')
def _wrap_as_optional(self, field, name, columns): """ Adds a wrapper rule to the field to accept empty strings. This empty string should be of the same size as the columns parameter. One smaller or bigger will be rejected. This wrapper will return None if the field is empty. :param field: the field to wrap :param name: name of the field :param columns: number of columns it takes :return: the field with an additional rule to allow empty strings """ # Regular expression accepting as many whitespaces as columns field_empty = pp.Regex('[ ]{' + str(columns) + '}') resultsName = field.resultsName field_empty.setName(name) # Whitespaces are not removed field_empty.leaveWhitespace() # None is returned by this rule field_empty.setParseAction(pp.replaceWith(None)) field_empty = field_empty.setResultsName(resultsName) field = field | field_empty field.setName(name) field = field.setResultsName(resultsName) field.leaveWhitespace() return field
def flag(name=None, compulsory=False): """ Creates the grammar for a Flag (F) field, accepting only 'Y', 'N' or 'U'. :param name: name for the field :param compulsory: indicates if the empty flag is disallowed :return: grammar for the flag field """ if name is None: name = 'Flag Field' # Basic field field = pp.Combine(pp.Word('YNU', exact=1)) # Parse action field.setParseAction(lambda f: _to_flag(f[0])) # Name field.setName(name) if not compulsory: # If it is not compulsory the empty date is accepted optional = pp.Literal(' ') optional.setParseAction(pp.replaceWith(None)) # Name optional.setName(name) field = field | optional # Name field.setName(name) field.leaveWhitespace() return field
def _wrap_as_optional_numeric(self, field, name, columns): # Regular expression accepting as many whitespaces as columns field_empty = pp.Regex('[0]{' + str(columns) + '}') resultsName = field.resultsName field_empty.setName(name) # Whitespaces are not removed field_empty.leaveWhitespace() # None is returned by this rule field_empty.setParseAction(pp.replaceWith(None)) field_empty = field_empty.setResultsName(field.resultsName) field = field | field_empty field.setName(name) field = field.setResultsName(resultsName) field.leaveWhitespace() return field
def _parse_ios_interfaces(data, acls_as_list=True, auto_cleanup=True): """ Walks through a IOS interface config and returns a dict of parts. Intended for use by trigger.cmds.NetACLInfo.ios_parse() but was written to be portable. @auto_cleaup: Set to False if you don't want to pass results through cleanup_results(). Enabled by default. output @acls_as_list: Set to False if you want acl names as strings instead of list members. (e.g. "ABC123" vs. ['ABC123']) """ import pyparsing as pp # Setup bang = pp.Literal("!").suppress() anychar = pp.Word(pp.printables) nonbang = pp.Word(''.join([x for x in pp.printables if x != "!"]) + '\n\r\t ') comment = bang + pp.restOfLine.suppress() #weird things to ignore in foundries aaa_line = pp.Literal("aaa").suppress() + pp.restOfLine.suppress() module_line = pp.Literal("module").suppress() + pp.restOfLine.suppress() startup_line = pp.Literal("Startup").suppress() + pp.restOfLine.suppress() ver_line = pp.Literal("ver") + anychar#+ pp.restOfLine.suppress() #using SkipTO instead now #foundry example: #telnet@olse1-dc5#show configuration | include ^(interface | ip address | ip access-group | description|!) #! #Startup-config data location is flash memory #! #Startup configuration: #! #ver 07.5.05hT53 #! #module 1 bi-0-port-m4-management-module #module 2 bi-8-port-gig-module #there is a lot more that foundry is including in the output that should be ignored interface_keyword = pp.Keyword("interface") unwanted = pp.SkipTo(interface_keyword, include=False).suppress() #unwanted = pp.ZeroOrMore(bang ^ comment ^ aaa_line ^ module_line ^ startup_line ^ ver_line) octet = pp.Word(pp.nums, max=3) ipaddr = pp.Combine(octet + "." + octet + "." + octet + "." + octet) address = ipaddr netmask = ipaddr cidr = pp.Literal("/").suppress() + pp.Word(pp.nums, max=2) # Description desc_keyword = pp.Keyword("description") description = pp.Dict( pp.Group(desc_keyword + pp.Group(pp.restOfLine)) ) # Addresses #cisco example: # ip address 172.29.188.27 255.255.255.224 secondary # #foundry example: # ip address 10.62.161.187/26 ipaddr_keyword = pp.Keyword("ip address").suppress() secondary = pp.Literal("secondary").suppress() #foundry matches on cidr and cisco matches on netmask #netmask converted to cidr in cleanup ip_tuple = pp.Group(address + (cidr ^ netmask)).setResultsName('addr', listAllMatches=True) ip_address = ipaddr_keyword + ip_tuple + pp.Optional(secondary) addrs = pp.ZeroOrMore(ip_address) # ACLs acl_keyword = pp.Keyword("ip access-group").suppress() # acl_name to be [''] or '' depending on acls_as_list acl_name = pp.Group(anychar) if acls_as_list else anychar direction = pp.oneOf('in out').suppress() acl_in = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('in')) acl_in.setParseAction(pp.replaceWith('acl_in')) acl_out = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('out')) acl_out.setParseAction(pp.replaceWith('acl_out')) acl = pp.Dict( pp.Group((acl_in ^ acl_out) + acl_name)) + direction acls = pp.ZeroOrMore(acl) # Interfaces iface_keyword = pp.Keyword("interface").suppress() foundry_awesome = pp.Literal(" ").suppress() + anychar #foundry exmaple: #! #interface ethernet 6/6 # ip access-group 126 in # ip address 172.18.48.187 255.255.255.255 #cisco example: #! #interface Port-channel1 # description gear1-mtc : AE1 : iwslbfa1-mtc-sw0 : : 1x1000 : 172.20.166.0/24 : : : # ip address 172.20.166.251 255.255.255.0 interface = pp.Combine(anychar + pp.Optional(foundry_awesome)) iface_body = pp.Optional(description) + pp.Optional(acls) + pp.Optional(addrs) + pp.Optional(acls) #foundry's body is acl then ip and cisco's is ip then acl iface_info = pp.Optional(unwanted) + iface_keyword + pp.Dict( pp.Group(interface + iface_body) ) + pp.SkipTo(bang) #iface_info = unwanted + pp.Dict( pp.Group(interface + iface_body) ) + pp.SkipTo(bang) interfaces = pp.Dict( pp.ZeroOrMore(iface_info) ) # And results! #this is where the parsing is actually happening try: results = interfaces.parseString(data) #print results except: # (ParseException, ParseFatalException, RecursiveGrammarException): #err: #pass #print "caught some type of error" #print err.line #print " "*(err.column-1) + "^" #print err #sys.stderr.write("parseString threw an exception") results = dict() return cleanup_interface_results(results) if auto_cleanup else results
def make_keyword(kwd_str, kwd_value): return pp.Keyword(kwd_str).setParseAction(pp.replaceWith(kwd_value))
def __init__(self, user_message, user_timezone): """RemindMe Class Initialization""" self.usr_msg = user_message self.usr_tz = timezone(user_timezone) self.suffix = '' # raw regex compilations list self.regexes = list() # remind me to (take the trash out) (tomorrow) at 5 regex = {} regex['regex'] = r'^([rR]emind me to )?([\s\S]+?) ((([sS]un|[mM]on|([tT](ues|hurs))|[fF]ri)(day|\.)?|[wW]ed(\.|nesday)?|[sS]at(\.|urday)?|[tT]((ue?)|(hu?r?))\.?)( (at )?(\w+)?)?|([tT]omorrow|[tT]oday)( (at )?(\w+)?)?|(\d+|\w+) \w+ from now)$' regex['reminder'] = 1 regex['time'] = 19 regex['add'] = '' self.regexes.append(regex) # remind me to (take out the trash) in (3 hours) regex = {} regex['regex'] = r'^([rR]emind me to )?([\s\S]+?) in ([\s\S]+)$' regex['reminder'] = 1 regex['time'] = 2 regex['add'] = 'from now' self.regexes.append(regex) # (tomorrow) at (5), remind me to (take out the trash) regex = {} regex['regex'] = r'^((([sS]un|[mM]on|([tT](ues|hurs))|[fF]ri)(day|\.)?|[wW]ed(\.|nesday)?|[sS]at(\.|urday)?|[tT]((ue?)|(hu?r?))\.?)( (at )?(\w+)?)?|([Tt]omorrow|[Tt]oday)( (at )?(\w+)?)?|(\d+|\w+) \w+ from now),? (remind me to )?([\s\S]+?)$' regex['reminder'] = 20 regex['time'] = 17 regex['add'] = '' self.regexes.append(regex) # In 5 hours remind me to take out the trash regex = {} regex['regex'] = r'^[iI]n ((\d+|\w+) \w+),? (remind me to )?([\s\S]+?)$' regex['reminder'] = 3 regex['time'] = 0 regex['add'] = 'from now' self.regexes.append(regex) self.CL = CaselessLiteral # grammar definitions self.today, self.tomorrow, self.yesterday, self.noon, self.midnight, self.now = map(self.CL, "today tomorrow yesterday noon midnight now".split()) self.plural = lambda s: Combine(self.CL(s) + Optional(self.CL("s"))) self.week, self.day, self.hour, self.minute, self.second = map(self.plural, "week day hour minute second".split()) self.am = self.CL("am") self.pm = self.CL("pm") self.COLON = Suppress(':') # are these actually operators? self.in_ = self.CL("in").setParseAction(replaceWith(1)) self.from_ = self.CL("from").setParseAction(replaceWith(1)) self.before = self.CL("before").setParseAction(replaceWith(-1)) self.after = self.CL("after").setParseAction(replaceWith(1)) self.ago = self.CL("ago").setParseAction(replaceWith(-1)) self.next_ = self.CL("next").setParseAction(replaceWith(1)) self.last_ = self.CL("last").setParseAction(replaceWith(-1)) self.couple = (Optional(self.CL("a")) + self.CL("couple") + Optional(self.CL("of"))).setParseAction(replaceWith(2)) self.a_qty = self.CL("a").setParseAction(replaceWith(1)) self.integer = Word(pyparsing.nums).setParseAction(lambda t:int(t[0])) self.int4 = Group(Word(pyparsing.nums,exact=4).setParseAction(lambda t: [int(t[0][:2]),int(t[0][2:])] )) self.qty = self.integer | self.couple | self.a_qty self.dayName = oneOf( list(calendar.day_name) ) self.dayOffset = (self.qty("qty") + (self.week | self.day)("timeunit")) self.dayFwdBack = (self.from_ + self.now.suppress() | self.ago)("dir") self.weekdayRef = (Optional(self.next_ | self.last_,1)("dir") + self.dayName("day")) self.dayRef = Optional( (self.dayOffset + (self.before | self.after | self.from_)("dir") ).setParseAction(self.convert_to_timedelta) ) + ((self.yesterday | self.today | self.tomorrow)("name") | self.weekdayRef("wkdayRef")).setParseAction(self.convert_to_day) self.todayRef = (self.dayOffset + self.dayFwdBack).setParseAction(self.convert_to_timedelta) | (self.in_("dir") + self.qty("qty") + self.day("timeunit")).setParseAction(self.convert_to_timedelta) self.dayTimeSpec = self.dayRef | self.todayRef self.dayTimeSpec.setParseAction(self.calculate_time) self.hourMinuteOrSecond = (self.hour | self.minute | self.second) self.timespec = Group(self.int4("miltime") | self.integer("HH") + Optional(self.COLON + self.integer("MM")) + Optional(self.COLON + self.integer("SS")) + (self.am | self.pm)("ampm")) self.absTimeSpec = ((self.noon | self.midnight | self.now | self.timespec("timeparts"))("timeOfDay") + Optional(self.dayRef)("dayRef")) self.absTimeSpec.setParseAction(self.convert_to_abs_time,self.calculate_time) self.relTimeSpec = self.qty("qty") + self.hourMinuteOrSecond("timeunit") + (self.from_ | self.before | self.after)("dir") + self.absTimeSpec("absTime") | self.qty("qty") + self.hourMinuteOrSecond("timeunit") + self.ago("dir") | self.in_ + self.qty("qty") + self.hourMinuteOrSecond("timeunit") self.relTimeSpec.setParseAction(self.convert_to_timedelta,self.calculate_time)
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value to unresolved substitution. If overriden with a default value, it will replace all unresolved values by the default value. If it is set to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3:-3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) def convert_period(tokens): period_value = int(tokens.value) period_identifier = tokens.unit period_unit = next((single_unit for single_unit, values in cls.get_supported_period_type_map().items() if period_identifier in values)) return period(period_value, period_unit) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance( final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith( "https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance( final_tokens[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value elif final_tokens[0] == 'package': file = cls.resolve_package_path(value) else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) def _make_prefix(path): return ('<root>' if path is None else '[%s]' % path).ljust(55).replace('\\', '/') _prefix = _make_prefix(path) def _load(path): _prefix = _make_prefix(path) logger.debug('%s Loading config from file %r', _prefix, path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) logger.debug('%s Result: %s', _prefix, obj) return obj if '*' in path or '?' in path: paths = glob(path, recursive=True) obj = None def _merge(a, b): if a is None or b is None: return a or b elif isinstance(a, ConfigTree) and isinstance( b, ConfigTree): return ConfigTree.merge_configs(a, b) elif isinstance(a, list) and isinstance(b, list): return a + b else: raise ConfigException( 'Unable to make such include (merging unexpected types: {a} and {b}', a=type(a), b=type(b)) logger.debug('%s Loading following configs: %s', _prefix, paths) for p in paths: obj = _merge(obj, _load(p)) logger.debug('%s Result: %s', _prefix, obj) else: logger.debug('%s Loading single config: %s', _prefix, path) obj = _load(path) else: raise ConfigException( 'No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) @contextlib.contextmanager def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default) with set_default_white_spaces(): assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction( replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction( replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction( replaceWith(NoneValue())) key = QuotedString( '"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex( r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # Flatten the list of lists with unit strings. period_types = list( itertools.chain(*cls.get_supported_period_type_map().values())) # `Or()` tries to match the longest expression if more expressions # are matching. We employ this to match e.g.: 'weeks' so that we # don't end up with 'w' and 'eeks'. Note that 'weeks' but also 'w' # are valid unit identifiers. # Allow only spaces as a valid separator between value and unit. # E.g. \t as a separator is invalid: '10<TAB>weeks'. period_expr = ( Word(nums)('value') + ZeroOrMore(White(ws=' ')).suppress() + Or(period_types)('unit') + WordEnd(alphanums).suppress()).setParseAction(convert_period) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex( '""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex( r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex( r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*' ).setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ( (Keyword('url') | Keyword('file') | Keyword('package')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = (Keyword("include", caseless=True).suppress() + (include_content | (Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress())) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal('\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress( '}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore( eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - ( dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions( config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException( 'resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION' ) if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
sha1 = file_hash_word_start + Word( hexnums, exact=40).setParseAction(downcaseTokens) + alphanum_word_end sha256 = file_hash_word_start + Word( hexnums, exact=64).setParseAction(downcaseTokens) + alphanum_word_end authentihash = Combine( Or(['authentihash']) + Optional(Word(printables, excludeChars=alphanums)) + sha256('hash'), joinString=' ', adjacent=False, ) sha512 = file_hash_word_start + Word( hexnums, exact=128).setParseAction(downcaseTokens) + alphanum_word_end year = Word('12') + Word(nums, exact=3) cve = (alphanum_word_start + Combine( Or(['cve', 'CVE']).setParseAction(replaceWith('CVE')) + Word('- ').setParseAction(replaceWith('-')) + year('year') + Word('-') + Word(nums, min=4)('cve_id')) + alphanum_word_end) asn = (alphanum_word_start + Combine( Or(['as', 'AS']).setParseAction(replaceWith('AS')) + Optional(Word('nN ')).setParseAction(replaceWith('N')) + Word(nums)('as_number')) + alphanum_word_end) # todo: implement ipv6 cidr ranges ipv4_cidr = (alphanum_word_start + Combine( ipv4_address('cidr_address') + '/' + Word(nums, max=2)('cidr_bit_range')) + alphanum_word_end) root_key_list = [ 'HKEY_LOCAL_MACHINE',
# define punctuation as suppressed literals lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon, comma = map( pp.Suppress, "()[]{}:,") integer = pp.Regex(r"[+-]?\d+").setName("integer").setParseAction(cvtInt) real = pp.Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName( "real").setParseAction(cvtReal) tupleStr = pp.Forward() listStr = pp.Forward() dictStr = pp.Forward() unistr = pp.unicodeString().setParseAction(lambda t: t[0][2:-1]) quoted_str = pp.quotedString().setParseAction(lambda t: t[0][1:-1]) boolLiteral = pp.oneOf("True False", asKeyword=True).setParseAction(cvtBool) noneLiteral = pp.Keyword("None").setParseAction(pp.replaceWith(None)) listItem = (real | integer | quoted_str | unistr | boolLiteral | noneLiteral | pp.Group(listStr) | tupleStr | dictStr) tupleStr <<= (lparen + pp.Optional(pp.delimitedList(listItem)) + pp.Optional(comma) + rparen) tupleStr.setParseAction(cvtTuple)
def _parse_ios_interfaces(data, acls_as_list=True, auto_cleanup=True, skip_disabled=True): """ Walks through a IOS interface config and returns a dict of parts. Intended for use by `~trigger.cmds.NetACLInfo.ios_parse()` but was written to be portable. :param acls_as_list: Whether you want acl names as strings instead of list members, e.g. :param auto_cleanup: Whether you want to pass results through cleanup_results(). Default: ``True``) "ABC123" vs. ['ABC123']. (Default: ``True``) :param skip_disabled: Whether to skip disabled interfaces. (Default: ``True``) """ import pyparsing as pp # Setup bang = pp.Literal("!").suppress() anychar = pp.Word(pp.printables) nonbang = pp.Word(''.join([x for x in pp.printables if x != "!"]) + '\n\r\t ') comment = bang + pp.restOfLine.suppress() #weird things to ignore in foundries aaa_line = pp.Literal("aaa").suppress() + pp.restOfLine.suppress() module_line = pp.Literal("module").suppress() + pp.restOfLine.suppress() startup_line = pp.Literal("Startup").suppress() + pp.restOfLine.suppress() ver_line = pp.Literal("ver") + anychar #+ pp.restOfLine.suppress() #using SkipTO instead now #foundry example: #telnet@olse1-dc5#show configuration | include ^(interface | ip address | ip access-group | description|!) #! #Startup-config data location is flash memory #! #Startup configuration: #! #ver 07.5.05hT53 #! #module 1 bi-0-port-m4-management-module #module 2 bi-8-port-gig-module #there is a lot more that foundry is including in the output that should be ignored interface_keyword = pp.Keyword("interface") unwanted = pp.SkipTo(interface_keyword, include=False).suppress() #unwanted = pp.ZeroOrMore(bang ^ comment ^ aaa_line ^ module_line ^ startup_line ^ ver_line) octet = pp.Word(pp.nums, max=3) ipaddr = pp.Combine(octet + "." + octet + "." + octet + "." + octet) address = ipaddr netmask = ipaddr cidr = pp.Literal("/").suppress() + pp.Word(pp.nums, max=2) # Description desc_keyword = pp.Keyword("description") description = pp.Dict(pp.Group(desc_keyword + pp.Group(pp.restOfLine))) # Addresses #cisco example: # ip address 172.29.188.27 255.255.255.224 secondary # #foundry example: # ip address 10.62.161.187/26 ipaddr_keyword = pp.Keyword("ip address").suppress() secondary = pp.Literal("secondary").suppress() #foundry matches on cidr and cisco matches on netmask #netmask converted to cidr in cleanup ip_tuple = pp.Group(address + (cidr ^ netmask)).setResultsName( 'addr', listAllMatches=True) negotiated = pp.Literal('negotiated') # Seen on Cisco 886 ip_address = ipaddr_keyword + (negotiated ^ ip_tuple) + pp.Optional(secondary) addrs = pp.ZeroOrMore(ip_address) # ACLs acl_keyword = pp.Keyword("ip access-group").suppress() # acl_name to be [''] or '' depending on acls_as_list acl_name = pp.Group(anychar) if acls_as_list else anychar direction = pp.oneOf('in out').suppress() acl_in = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('in')) acl_in.setParseAction(pp.replaceWith('acl_in')) acl_out = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('out')) acl_out.setParseAction(pp.replaceWith('acl_out')) acl = pp.Dict(pp.Group((acl_in ^ acl_out) + acl_name)) + direction acls = pp.ZeroOrMore(acl) # Interfaces iface_keyword = pp.Keyword("interface").suppress() foundry_awesome = pp.Literal(" ").suppress() + anychar #foundry exmaple: #! #interface ethernet 6/6 # ip access-group 126 in # ip address 172.18.48.187 255.255.255.255 #cisco example: #! #interface Port-channel1 # description gear1-mtc : AE1 : iwslbfa1-mtc-sw0 : : 1x1000 : 172.20.166.0/24 : : : # ip address 172.20.166.251 255.255.255.0 interface = pp.Combine(anychar + pp.Optional(foundry_awesome)) iface_body = pp.Optional(description) + pp.Optional(acls) + pp.Optional( addrs) + pp.Optional(acls) #foundry's body is acl then ip and cisco's is ip then acl iface_info = pp.Optional(unwanted) + iface_keyword + pp.Dict( pp.Group(interface + iface_body)) + pp.Optional(pp.SkipTo(bang)) interfaces = pp.Dict(pp.ZeroOrMore(iface_info)) # This is where the parsing is actually happening try: results = interfaces.parseString(data) except: # (ParseException, ParseFatalException, RecursiveGrammarException): results = {} if auto_cleanup: return _cleanup_interface_results(results, skip_disabled=skip_disabled) return results
def insertResult(v): """ Parser helper function that simply inserts a result in the list of values returned. """ return Empty().setParseAction( replaceWith(v) )
def _validate_func_name(str,loc,tokens): pass # =========== variable names rules ==================== var_indexer = pp.nestedExpr("[","]", pp.quotedString | pp.nums ) var_name = pp.Word(pp.alphas, pp.alphanums + "." ) var_name.setParseAction(_validate_var_name).setName("var_name") # =========== function rules ==================== func_args = pp.Forward() func_args << ( var_name | pp.Word(pp.alphanums) | pp.quotedString ) + pp.Optional( pp.Literal(",") + func_args ) func_name = var_name + pp.Literal("(") + pp.Optional(func_args) + pp.Literal(")") func_name.setName("func_name") # =========== elementary query rules ==================== bin_oper = pp.oneOf("== != <> <= < >= >") logic_oper = pp.CaselessLiteral("AND").setParseAction( pp.replaceWith("and") ) | \ pp.CaselessLiteral("OR").setParseAction( pp.replaceWith("or") ) | \ pp.Literal("&&").setParseAction(pp.replaceWith("and")) | \ pp.Literal("||").setParseAction(pp.replaceWith("or")) query_var = var_name query_val = pp.Word(pp.alphanums) | pp.quotedString # =========== query rules ==================== query_expr_elem = query_var + bin_oper + query_val query_expr_bin = query_expr_elem | pp.nestedExpr(content = query_expr_elem) query_expr_logic = pp.Forward() query_expr_logic << query_expr_bin + pp.Optional( logic_oper + query_expr_bin ) + pp.Optional(logic_oper + query_expr_logic) query_expr_full = query_expr_logic | pp.nestedExpr( content = query_expr_logic )