def _parse_phoenixgroup(): """Parse a single line containing a group definition.""" data = ( Suppress(Word(alphanums)) + \ sglQuotedString.setParseAction(removeQuotes).setResultsName("name") ) return data
def get_grammar( save_token_function=lambda: None, save_int_function=lambda: None, save_decimal_function=lambda: None, save_string_function=lambda: None, save_ident_function=lambda: None, save_list_function=lambda: None, save_boolean_function=lambda: None, ): expr = Forward() atom = Forward() arg = expr args = delimitedList(arg) func_call = (func_name + lbrace + Optional(args) + rbrace).setParseAction(save_token_function) bracketed_list = (lbracket + Optional(delimitedList(atom)) + rbracket).setParseAction(save_list_function) terminals = ( BOOLEAN.setParseAction(save_boolean_function) | decimal.setParseAction(save_decimal_function) | integer.setParseAction(save_int_function) | ident.setParseAction(save_ident_function) | sglQuotedString.setParseAction(save_string_function) ) atom <<= func_call | terminals | (lbrace + expr + rbrace) | bracketed_list eq_factor = Forward() eq_factor <<= atom + ZeroOrMore((eqop + eq_factor).setParseAction(save_token_function)) factor = eq_factor + ZeroOrMore((exponent + eq_factor).setParseAction(save_token_function)) term = factor + ZeroOrMore((multdivide + factor).setParseAction(save_token_function)) rel_term = term + ZeroOrMore((relational + term).setParseAction(save_token_function)) plusminus_term = rel_term + ZeroOrMore((plusminus + rel_term).setParseAction(save_token_function)) expr <<= plusminus_term + ZeroOrMore((logicop + plusminus_term).setParseAction(save_token_function)) # Define the grammar now ... grammar = expr + StringEnd() return grammar
def parse_javascript_vars(data): '''Receives a string of JavaScript-like data and tries to parse it. Returns a dict with each var. Several assumptions are made: - Only the assignment operator '=' is supported. - The script is composed of one or more assignments, and nothing else. - The "var " prefix before an assignment is optional. - No variable is assigned more than once. - Comments should be correctly ignored, as well as whitespace. - Values can be numbers, strings, arrays or dictionaries. - Arrays and dictionaries can only contain number and strings. - Dictionary keys can be numbers, strings, or an identifier. Sample input for this grammar: var i = 0; // Optional var, optional semicolon. j = 0x10 // 16 k = -010 // -8 f = 1.0 g = +.9 // Optional leading 0, optional signal. s = 'single quoted' t = "double quoted" a = [] b = [0, 1, 'string', "double", 3.14] c = {} d = { foo: 'without quotes', 'bar': "as a string", 3: 'as a number' } This code can parse cgi_adsl_info.cgi, but it can't parse cgi_atm_info.cgi. ''' from pyparsing import Combine, Dict, Group, Keyword, LineEnd, OneOrMore, \ Optional, StringEnd, Suppress, White, Word, alphanums, alphas, \ cppStyleComment, dblQuotedString, dblSlashComment, delimitedList, \ hexnums, nums, removeQuotes, sglQuotedString # AKA identifier. varname = Word(alphas + '_$', alphanums + '_$') # This Optional(Suppress(White)) is required to because of the firstOf # operator when defining number. number_signal = Optional(Word('-+', exact=1)) + Optional(Suppress(White())) decimal_number = number_signal + Word('123456789', nums) # Scientific notation is not supported. float_number = number_signal + Optional(Word(nums)) + '.' + Word(nums) # For convenience, zero is considered an octal number. octal_number = number_signal + Word('0', '01234567') hex_number = number_signal + '0x' + Word(hexnums) number = Combine(float_number | decimal_number | hex_number | octal_number) def convert_number(toks): s = toks[0] signal = s[0] if s[0] in '+-' else '' number = s[1:] if signal else s if '.' in s: return float(s) elif number.startswith('0x'): return int(signal + number[2:], base=16) elif number.startswith('0'): return int(s, base=8) else: return int(s, base=10) number.setParseAction(convert_number) string = (dblQuotedString.setParseAction(removeQuotes) | sglQuotedString.setParseAction(removeQuotes)) # Nested arrays/dicts are not supported. array_list = Group( Suppress('[') + Optional(delimitedList(number | string)) + Suppress(']')) array_associative = Group( Dict( Suppress('{') + Optional( delimitedList( Group((number | string | varname) + Suppress(':') + (number | string)))) + Suppress('}'))) value = number | string | array_list | array_associative assignment = Group( Optional(Suppress(Keyword('var'))) + varname + Suppress('=') + value + Suppress(';' | LineEnd())) parser = Dict(OneOrMore(assignment)) + StringEnd() parser.ignore(dblSlashComment) parser.ignore(cppStyleComment) tree = parser.parseString(data) # Converting the pyparsing.ParseResults tree into a simple Python dict. ret = {} for var, subtree in tree.asDict().items(): if isinstance(subtree, pyparsing.ParseResults): try: # Using .asDict() converts all integer keys to strings. # ret[var] = subtree.asDict() # Using .asList() retains numbers as numbers. ret[var] = dict(subtree.asList()) except TypeError: ret[var] = subtree.asList() else: # Most likely already a number or string. ret[var] = subtree return ret