def list_dict(word=word_free): """ Return the pyparsing lexical element, that parses a string either as a list or as a dictionary. Parameters ---------- word : lexical element A custom lexical element for word. Returns ------- ld : lexical element The returned lexical element parses a string in the form ``..., ..., ...`` or ``key1:..., key2=..., key3: ...`` where ``...`` is a ``list_item`` from :func:`get_standard_type_defs()` and interprets it as a list or a dictionary. """ defs = get_standard_type_defs(word) i = defs['list_item'] arg = i.copy() arg.setParseAction(lambda t: (t[0],)) narg = word_strict + (colon | equal_sign) + i narg.setParseAction(lambda t: (t[0], t[1])) ld = Group(list_of(narg | arg)) ld.setParseAction(lambda t: ([x[0] for x in t[0] if len(x) == 1], dict([x for x in t[0] if len(x) > 1])) ) return ld
def _read_solution(scenario,log,task_to_id,id_to_resource) : S = scenario # parse output from pyparsing import Keyword,Literal,Word,alphas,nums,printables,OneOrMore,ZeroOrMore,dblQuotedString,Group INT = Word( nums ) int_row = Group( INT + Literal(",").suppress() + \ INT + Literal(",").suppress() + \ INT + Literal(";").suppress() ) plan = Group( Group( ZeroOrMore(int_row) ) ) start_str, end_str = '##START_SOLUTION##', '##END_SOLUTION##' start_i, end_i = log.index(start_str)+len(start_str), log.index(end_str) opl_plan = plan.parseString(log[start_i:end_i]) int_plan = opl_plan[0][0] # get starts and resource assignments starts = dict() assign = dict() for row in int_plan : task_id = int(row[0]) starts[task_id] = int(row[2]) if task_id not in assign : assign[task_id] = list() assign[task_id].append(int(row[1])) # add to scenario for T in S.tasks() : T.start = starts[task_to_id[T]] #second column is start if T.resources is None : T.resources = list() T.resources += [ id_to_resource[j] for j in assign[task_to_id[T]] ]
def parseEqun(equation): cForm = Word(ascii_uppercase, ascii_uppercase + ascii_lowercase + digits) equnExpr = Group(ZeroOrMore(cForm + Suppress('+')) + cForm) lhs = equnExpr.setResultsName('lhs') rhs = equnExpr.setResultsName('rhs') chemicalEqun = lhs + "->" + rhs parsedEqun = chemicalEqun.parseString(equation) LHS = parsedEqun['lhs'].asList() RHS = parsedEqun['rhs'].asList() lhsDict = {} rhsDict = {} element = Word(ascii_uppercase, ascii_lowercase) integer = Word(digits).setParseAction(lambda x: int(x[0])) elementRef = Group(element + Optional(integer, default=1)) chemicalFormula = OneOrMore(elementRef) for chemical in LHS: lhsDict[chemical] = Counter() for element, count in chemicalFormula.parseString(chemical): lhsDict[chemical][element] += count for chemical in RHS: rhsDict[chemical] = Counter() for element, count in chemicalFormula.parseString(chemical): rhsDict[chemical][element] += count return lhsDict, rhsDict
def parseReactions(reaction): grammar = Group(Word(alphanums) + Suppress('()') + Optional(Suppress('+') + Word(alphanums) + Suppress("()"))) + Suppress("->") + Group(Word(alphanums) + Suppress("()") + Optional(Suppress('+') + Word(alphanums) + Suppress("()"))) + Suppress(Word(alphanums + "()")) return grammar.parseString(reaction).asList()
def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word( nums ) ipAddress = delimitedList( integer, ".", combine=True ) timeZoneOffset = Word("+-",nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine( integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer ) + timeZoneOffset + Suppress("]") ) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) return logLineBNF
def ifParser(): comma = Literal(",").suppress() hash = Literal("#").suppress() equal = Literal("=").suppress() # Rules and labels rulename = Word (alphanums + "_") rulecategory = oneOf("Protocol_Rules Invariant_Rules Decomposition_Rules Intruder_Rules Init Goal") label = hash + Literal("lb") + equal + rulename + comma + Literal("type") + equal + rulecategory labeledrule = Group(label) + Group(ruleParser()) def labeledruleAction(s,l,t): if t[0][3] == "Protocol_Rules": print "-----------------" print "- Detected rule -" print "-----------------" print t[0] print t[1] print labeledrule.setParseAction(labeledruleAction) # A complete file parser = OneOrMore(labeledrule) parser.ignore("##" + restOfLine) return parser
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join([unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress(self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine(namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def parse_connection_str(connstr): ## Grammar for connection syntax digits="0123456789" othervalid="_.@" identifier= Word(alphas+digits+othervalid) nodename=identifier.setResultsName('nodename') outputnames = delimitedList( identifier ).setResultsName('outputnames') inputnames = delimitedList( identifier ).setResultsName('inputnames') # middle nodes have both inputs and outputs middlenode= Group( nodename + Suppress('(') + inputnames + Optional( "|" + outputnames) + Suppress(")") ).setResultsName('middlenode') # first node has only outputs headnode = (nodename + Suppress("(") + outputnames + Suppress(")")).setResultsName('headnode') # last node has only inputs tailnode = (nodename + Suppress("(") + inputnames + Suppress(")")).setResultsName('tailnode') # connect head -> [middle ->] tail connect= Group( headnode + Group(ZeroOrMore(Suppress("->") \ + middlenode + FollowedBy("->") )).setResultsName('middlenodes') + Suppress("->")+tailnode).setResultsName('nodes') connectlist = Group( connect + ZeroOrMore( Suppress(";")\ + connect )).setResultsName('connects') parsed=connectlist.parseString(connstr) check_numconnections(parsed) return parsed
def build_select_grammar(): select_grammar = Forward() select_keyword_token = Keyword("select", caseless=True) from_keyword_token = Keyword("from", caseless=True) limit_keyword_token = Keyword("limit", caseless=True) order_by_keyword_token = Keyword("order by", caseless=True) where_keyword_token = Keyword("where", caseless=True) operators_tokens = oneOf("= != < > >= <=") column_name_tokens = Group(delimitedList(identifier_token, ",")) order_by_token = order_by_keyword_token + column_name_tokens.setResultsName("order_by_cols")\ + Optional( (Keyword("asc", caseless=True).setResultsName("order_by_type") | Keyword("desc", caseless=True).setResultsName("order_by_type")) ) limit_token = limit_keyword_token + Optional(Word(nums).setResultsName("offset") + Literal(",")) \ + Word(nums).setResultsName("rows_limit") where_expression = where_keyword_token + identifier_token.setResultsName("operand_left") \ + operators_tokens.setResultsName("operator") + Word(alphanums).setResultsName("operand_right") select_grammar << select_keyword_token + ('*' | column_name_tokens).setResultsName("columns")\ + from_keyword_token + table_name_token.setResultsName("table")\ + Optional(where_expression).setResultsName("where")\ + Optional(order_by_token).setResultsName("order")\ + Optional(limit_token).setResultsName("limit") return select_grammar
def _get_infos_from_file(self, info): # Object file line: '{object}: file format elf32-xtensa-le' object = Fragment.ENTITY.setResultsName("object") + Literal(":").suppress() + Literal("file format elf32-xtensa-le").suppress() # Sections table header = Suppress(Literal("Sections:") + Literal("Idx") + Literal("Name") + Literal("Size") + Literal("VMA") + Literal("LMA") + Literal("File off") + Literal("Algn")) entry = Word(nums).suppress() + Fragment.ENTITY + Suppress(OneOrMore(Word(alphanums, exact=8)) + Word(nums + "*") + ZeroOrMore(Word(alphas.upper()) + Optional(Literal(",")))) # Content is object file line + sections table content = Group(object + header + Group(ZeroOrMore(entry)).setResultsName("sections")) parser = Group(ZeroOrMore(content)).setResultsName("contents") sections_info_text = info.content results = None try: results = parser.parseString(sections_info_text) except ParseException as p: raise ParseException("Unable to parse section info file " + info.filename + ". " + p.message) return results
def _define_context_component (self, cluster, base_feature_set): placeholder = Literal(SOURCE_PLACEHOLDER) placeholder.setParseAction(self._handle_placeholder) context_component = Group(ZeroOrMore(cluster ^ base_feature_set) + \ placeholder + ZeroOrMore(cluster ^ base_feature_set)).setResultsName('context_component') context_component.setParseAction(self._handle_context_component) return context_component
def parse_showhdinfo(stdout, stderr): """ """ uuid_prefix = Suppress(Word('UUID:')) id_uuid = Word(alphanums + '-').setResultsName('uuid') accessible_prefix = Suppress(Word('Accessible:')) id_accessible = Word(alphas).setResultsName('accessible') logical_size_prefix = Suppress(Word('Logical size:')) id_logical_size = Word(alphanums + ' ').setResultsName('logical_size') current_size_prefix = Suppress(Word('Current size on disk:')) id_current_size = Word(alphanums + ' ').setResultsName('current_size') type_prefix = Suppress(Word('Type:')) id_type = Word(alphas + ' ()').setResultsName('type') prefix_storage_format = Suppress(Word('Storage format:')) id_storage_format = Word(alphas).setResultsName('storage_format') prefix_format_variant = Suppress(Word('Format variant:')) id_format_variant = Word(alphanums + ' ').setResultsName('format_variant') prefix_location = Suppress(Word('Location:')) id_location = Word(alphanums + ' /.').setResultsName('location') hd_info = Group(uuid_prefix + id_uuid + EOL + accessible_prefix + id_accessible + EOL + logical_size_prefix + id_logical_size + EOL + current_size_prefix + id_current_size + EOL + type_prefix + id_type + EOL + prefix_storage_format + id_storage_format + EOL + prefix_format_variant + id_format_variant + EOL + prefix_location + id_location + EOL) out = hd_info.parseString(stdout)[0] return {'uuid': out.uuid, 'accessible': out.accessible, 'logical_size': out.logical_size, 'current_size': out.current_size, 'type': out.type, 'storage_format': out.storage_format, 'format_variant': out.storage_variant, 'location': out.location}
def build_message(): global message message = Group(Optional(Suppress(Literal(":")) + prefix + space)) + Group(command) + Group(Optional(params)) if config.getboolean("parser", "trailing_spaces"): message += ZeroOrMore(space) if config.getboolean("parser", "soft_eol"): message += cr ^ lf ^ crlf else: message += crlf message.leaveWhitespace()
def parse(self, filename): model = None alpha = None lnl = None freq = None rates = None with open(filename) as fl: s = fl.read() try: model, lnl, alpha = self.common.parseString(s).asList() except ParseException as err: logger.error(err) if model == 'JC69': freq = [0.25, 0.25, 0.25, 0.25] rates = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] elif model == 'K80': freq = [0.25, 0.25, 0.25, 0.25] try: tstv = self.tstv.parseString(s).asList() except ParseException as err: logger.error(err) rates = [1.0, tstv[0], 1.0, 1.0, tstv[0], 1.0] elif model == 'F81': try: freq = self.freq.parseString(s).asList() except ParseException as err: logger.error(err) rates = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] elif model == 'F84' or model == 'HKY85' or model == 'TN93': parser = Group(self.tstv) + Group(self.freq) try: tstv, freq = parser.parseString(s).asList() except ParseException as err: logger.error(err) if model == 'TN93': rates = [1.0, tstv[0], 1.0, 1.0, tstv[1], 1.0] else: rates = [1.0, tstv[0], 1.0, 1.0, tstv[0], 1.0] elif model == 'GTR': parser = Group(self.freq) + Group(self.rates) try: freq, rates = parser.parseString(s).asList() except ParseException as err: logger.error(err) return model, alpha, lnl, freq, rates
def get_parser(): """Return a lyrics file parser. @see grammar.md for the whole grammar.""" if Lyrics._parser is None: # Parser not yet defined. Defining it. comment_line = COMMENT_SIGN + WORDS + EOL comments = Group(ZeroOrMore(comment_line)) section = Section.get_parser() sections = section + ZeroOrMore(EOL + section) Lyrics._parser = comments.setResultsName("comments") + sections.setResultsName("sections") return Lyrics._parser
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas+"_", alphanums+"_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal( "^" ) compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal( "." ) assign = Literal( "=" ) # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group(function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence(comp_expr, [ (andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp) ]) pattern = logic_expr + StringEnd() return pattern
def split_chemical_formula(formula): def is_number(s): try: float(s) return True except ValueError: return False def replace_things(stringg, listt, replacement): for x in listt: stringg = stringg.replace(x, replacement) return stringg bad_chars = ["(", ")", "-", "."] formula = replace_things(formula, bad_chars, "|") if is_number(formula): return [["", 0]] if len(formula) == 0: return [["", 0]] # define some strings to use later, when describing valid lists # of characters for chemical symbols and numbers caps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" lowers = caps.lower() digits = "0123456789" # Version 1 # Version 2 - Auto-convert integers, and add results names def convertIntegers(tokens): return int(tokens[0]) element = Word(caps, lowers) integer = Word(digits).setParseAction(convertIntegers) elementRef = Group(element("symbol") + Optional(integer, default=1)("qty")) # pre-1.4.7, use this: # elementRef = Group( element.setResultsName("symbol") + Optional( integer, default=1 ).setResultsName("qty") ) chemicalFormula = OneOrMore(elementRef) # Version 3 - Compute partial molecular weight per element, simplifying # summing # No need to redefine grammar, just define parse action function, and # attach to elementRef def computeElementWeight(tokens): element = tokens[0] element["weight"] = atomicWeight[element.symbol] * element.qty elementRef.setParseAction(computeElementWeight) formulaData = chemicalFormula.parseString(formula) mw = sum([element.weight for element in formulaData]) return formulaData
def parse(str): tokens = '' # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = Upcase( delimitedList( ident, ".", combine=True ) ) columnNameList = Group( delimitedList( columnName ) ) tableName = Upcase( delimitedList( ident, ".", combine=True ) ) tableNameList = Group( delimitedList( tableName ) ) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + columnRval ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + selectStmt + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) # define the grammar selectStmt << ( selectToken + ( '*' | columnNameList ).setResultsName( "columns" ) + fromToken + tableNameList.setResultsName( "tables" ) + Optional( Group( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("where") ) simpleSQL = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore( oracleSqlComment ) try: tokens = simpleSQL.parseString( str ) except ParseException, err: print " "*err.loc + "^\n" + err.msg print err
def read_tgf(path): """Generates an alias.ArgumentationFramework from a Trivial Graph Format (.tgf) file. Trivial Graph Format (TGF) is a simple text-based file format for describing graphs. \ It consists of a list of node definitions, which map node IDs to labels, followed by \ a list of edges, which specify node pairs and an optional edge label. \ Node IDs can be arbitrary identifiers, whereas labels for both nodes and edges are plain strings. Parameters ---------- path : file or string File, directory or filename to be read. Returns ------- framework : alias ArgumentationFramework Examples -------- References ---------- http://en.wikipedia.org/wiki/Trivial_Graph_Format """ try: from pyparsing import Word, alphanums, ZeroOrMore, White, Suppress, Group, ParseException, Optional except ImportError: raise ImportError("read_tgf requires pyparsing") if not isinstance(path, str): return # Define tgf grammar s = White(" ") tag = Word(alphanums) arg = Word(alphanums) att = Group(arg + Suppress(s) + arg + Optional(Suppress(s) + tag)) nl = Suppress(White("\n")) graph = Group(ZeroOrMore(arg + nl)) + Suppress("#") + nl + Group(ZeroOrMore(att + nl) + ZeroOrMore(att)) f = open(path, 'r') f = f.read() head, tail = ntpath.split(path) framework = al.ArgumentationFramework(tail) try: parsed = graph.parseString(f) except ParseException, e: raise al.ParsingException(e)
def build_message(): global message message = Group(Optional(Suppress(Literal(':')) + prefix + space)) + \ Group(command) + \ Group(Optional(params)) if config.getboolean('parser', 'trailing_spaces'): message += ZeroOrMore(space) if config.getboolean('parser', 'soft_eol'): message += cr ^ lf ^ crlf else: message += crlf message.leaveWhitespace()
def parseReactions(reaction): components = (Word(alphanums + "_") + Optional(Group('~' + Word(alphanums+"_"))) + Optional(Group('!' + Word(alphanums+'+?')))) molecule = (Word(alphanums + "_") + Optional(Suppress('(') + Group(components) + ZeroOrMore(Suppress(',') + Group(components)) +Suppress(')'))) species = Group(molecule) + ZeroOrMore(Suppress('.') + Group(molecule)) result = species.parseString(reaction).asList() return result
def get_fragment_grammar(): name = Fragment.IDENTIFIER header = Suppress("[") + Suppress("sections") + Suppress(":") + name.setResultsName("name") + Suppress("]") entry = Word(alphanums + "+" + ".") entries = Suppress("entries") + Suppress(":") + Group(OneOrMore(entry)).setResultsName("entries") sections = Group(header + entries) sections.setParseAction(lambda t: Sections(t[0].name, t[0].entries)) sections.ignore("#" + restOfLine) return sections
def main(s): lpar = Literal('(').suppress() rpar = Literal(')').suppress() integer = Word(nums) element = Word(alphas, exact=1) formula = Forward() term = Group((element | Group(lpar + formula + rpar)('subgroup')) + Optional(integer, default=1)('mult')) formula << OneOrMore(term) integer.setParseAction(process_integer) term.setParseAction(process_term) formula.setParseAction(process_formula) return formula.parseString(s)[0]
def SearchSyntax(): printables_without_colon = ''.join(letter for letter in printables if letter != ':') Colon = Literal(":").suppress() Filter = Group(Word(printables_without_colon) + Colon + Word(printables_without_colon) + Optional(Colon + (QuotedString('"', "\\") | Word(printables)))).setResultsName('filters') Filter.modalResults = False TextTerm = (QuotedString('"', "\\", unquoteResults=False) | Word(printables)).setResultsName('text_terms') TextTerm.modalResults = False Term = Filter | TextTerm Query = ZeroOrMore(Term) return Query
def parser(text): """ str := \w+ str := '\w+' exp := Var=str exp := exp & exp exp := exp ^ exp """ # grammar #g_string = "'"+Word(alphas)+"'" | Word(alphas) g_quote = Literal("'").suppress() g_text = Regex("[\w\s\:\#\.]+").setResultsName("text") g_string = Optional(g_quote) + g_text + Optional(g_quote) g_equ = Literal("!=").setResultsName("connector") | Literal("=").setResultsName("connector") g_amp = Literal("&").setResultsName("connector") g_hat = Literal("^").setResultsName("connector") g_or = Literal("|").suppress() g_seq = Literal("->").setResultsName("connector") g_hash = Literal("#").setResultsName("hash") g_left_brack = Literal("[").suppress() g_right_brack = Literal("]").suppress() g_vals = Forward() g_vals << g_string + ZeroOrMore(Group(g_or + g_vals).setResultsName("or_group")) # working """ exp_basic = Group(Optional(g_hash) + g_string).setResultsName("left") + g_equ + Group(g_vals).setResultsName("right") exp = Group(exp_basic) exp = exp.setResultsName("left") + g_amp + exp.setResultsName("right") | \ g_left_brack + exp.setResultsName("left") + g_hat + exp.setResultsName("right") + g_right_brack | \ g_left_brack + exp.setResultsName("left") + g_seq + exp.setResultsName("right") + g_right_brack | \ exp_basic """ # recursion simpleq = Forward() complexq = Forward() exp = (simpleq | complexq).setResultsName("exp") exp_basic = Group(Group(Optional(g_hash) + g_string).setResultsName("left") + g_equ + Group(g_vals).setResultsName("right")) simpleq << (Group(exp_basic.setResultsName("left") + g_amp + simpleq.setResultsName("right")) | exp_basic) complexq << ( Group(g_left_brack + exp.setResultsName("left") + g_hat + exp.setResultsName("right") + g_right_brack) | \ Group(g_left_brack + exp.setResultsName("left") + g_seq + exp.setResultsName("right") + g_right_brack) ) return exp.parseString(text)
def build_insert_grammar(): insert_grammar = Forward() insert_into_keyword_token = Keyword("insert into", caseless=True) values_token = Keyword("values", caseless=True) columns = Optional(Group(delimitedList(identifier_token, ","))) values_list_token = Group(delimitedList(Word(alphanums + " "), ",")) insert_grammar << insert_into_keyword_token + table_name_token.setResultsName("table_name") \ + Literal("(") + columns.setResultsName("columns") + Literal(")") + \ values_token + Literal("(") + values_list_token.setResultsName("values_list") + Literal(")") return insert_grammar
class NodeParser: def __init__(self): self.num = Word(nums) self.header = Regex(r"^UCLA.*") self.comment = Regex(r"#.*") self.bkid = Word(alphanums) self.num_nodes = Literal("NumNodes") + Literal(":") + self.num("NumNodes") self.num_terminals = Literal("NumTerminals") + Literal(":") + self.num("NumTerminals") self.size = Group(self.num("width") + self.num("height")) self.terminal = Optional(Literal("terminal")) self.node = self.bkid("id") + self.size("size") + self.terminal self.node_grammar = ( self.header + ZeroOrMore(self.comment) + self.num_nodes + self.num_terminals + OneOrMore(self.node) ) self.coordinate = Group(self.num("x") + self.num("y")) self.pl = ( self.bkid("id") + self.coordinate("coordinate") + Suppress(Literal(": N") + Optional(Literal(r"/FIXED"))) ) self.pl_grammar = self.header + ZeroOrMore(self.comment) + OneOrMore(self.pl) def compute_chip_size(self, benchmark): benchmark_path = pathlib.Path(os.environ["BENCHMARK"]) node_file = benchmark_path / "ispd2005/{0}/{0}.nodes".format(benchmark) pl_file = benchmark_path / "ispd2005/{0}/{0}.pl".format(benchmark) print(node_file.as_posix()) print(pl_file.as_posix()) x_max = 0 y_max = 0 sizes = [] coordinates = [] self.size.setParseAction(lambda tokens: sizes.append([tokens.width, tokens.height])) self.coordinate.setParseAction(lambda tokens: coordinates.append((tokens.x, tokens.y))) self.bkid.setParseAction(lambda tokens: print(tokens[0])) self.node_grammar.parseFile(node_file.as_posix()) self.pl_grammar.parseFile(pl_file.as_posix()) for i in range(len(sizes)): print(i) if coordinates[i][0] + sizes[i][0] > x_max: x_max = coordinates[i][0] + sizes[i][0] if coordinates[i][1] + sizes[i][1] > y_max: y_max = coordinates[i][1] + sizes[i][1] return x_max, y_max
def define_operand(self): """ Return the syntax definition for an operand. An operand can be a variable, a string, a number or a set. A set is made of other operands, including other sets. **This method shouldn't be overridden**. Instead, override the syntax definitions for variables, strings and/or numbers. If you want to customize the sets, check :meth:`T_SET_START`, :meth:`T_SET_END` and :meth:`T_ELEMENT_SEPARATOR`. """ identifier = self.define_identifier() operand = Forward() # Defining the sets: set_start = Suppress(self._grammar.get_token("set_start")) set_end = Suppress(self._grammar.get_token("set_end")) element_separator = self._grammar.get_token("element_separator") elements = delimitedList(operand, delim=element_separator) set_ = Group(set_start + Optional(elements) + set_end) set_.setParseAction(self.make_set) set_.setName("set") # Defining the variables: variable = identifier.copy() variable.setName("variable") variable.addParseAction(self.make_variable) # Defining the functions: function_name = identifier.setResultsName("function_name") function_name.setName("function_name") args_start = Suppress(self._grammar.get_token("arguments_start")) args_end = Suppress(self._grammar.get_token("arguments_end")) args_sep = self._grammar.get_token("arguments_separator") arguments = Optional(Group(delimitedList(operand, delim=args_sep)), default=()) arguments = arguments.setResultsName("arguments") arguments.setParseAction(lambda tokens: tokens[0]) function = function_name + args_start + arguments + args_end function.setName("function") function.setParseAction(self.make_function) operand << (function | variable | self.define_number() | \ self.define_string() | set_) return operand
def __init__(self): if PYPARSING: category = Word( alphas + "_-*", alphanums + "_-*" ) operator = oneOf("and or ,") neg_operator = "not" elementRef = category definition = elementRef + ZeroOrMore( operator + elementRef) nestedformula = Group(Suppress(Optional(Literal("("))) + definition + Suppress(Optional(Literal(")")))) neg_nestedformula = Optional(neg_operator) + nestedformula self.finalformula = neg_nestedformula + ZeroOrMore( operator + neg_nestedformula) elementRef.setParseAction(self.__compute_element) neg_nestedformula.setParseAction(self.__compute_neg_formula) nestedformula.setParseAction(self.__compute_formula) self.finalformula.setParseAction(self.__myreduce)
def parse_startvm(stdout, stderr): """ """ waiting_prefix = Word('Waiting for VM') waiting_uuid = UUID_STRING.setResultsName('waiting_uuid') waiting_postfix = Word('to power on...') success_prefix = Word('VM') success_uuid = UUID_STRING.setResultsName('success_uuid') success_postfix = Word("has been successfully started.") total = Group(waiting_prefix + DBLQUOTE + waiting_uuid + DBLQUOTE + waiting_postfix + EOL + success_prefix + DBLQUOTE + success_uuid + DBLQUOTE + success_postfix) out = total.parseString(stdout)[0] return {'uuid': out.success_uuid}
def triple(subject, relation, obj) -> ParserElement: """Build a simple triple in PyParsing that has a ``subject relation object`` format.""" return And( [Group(subject)(SUBJECT), relation(RELATION), Group(obj)(OBJECT)])
import argparse, fileinput, os, os.path, collections # from IPython import embed from pdfrw import PdfReader, PdfWriter from pyparsing import Word, alphas, nestedExpr, CharsNotIn, \ Suppress, ZeroOrMore, Group, Optional, delimitedList # a rudimentary .memo parser Command = Word('\\', alphas) Argument = nestedExpr('{', '}') Key = CharsNotIn('=,') Val = CharsNotIn('=,') Whitespace = Suppress(ZeroOrMore(' ')) Keyval = Group( Whitespace + Key + Optional(Whitespace + Suppress('=') + Whitespace + Val, default=None)) Keylist = delimitedList(Keyval) class CommandCall: def __init__(self, line): self.command = Command.parseString(line)[0] self.arguments = [] for _, start, end in Argument.scanString(line): self.arguments.append( collections.OrderedDict( Keylist.parseString(line[start + 1:end - 1]).asList())) def __str__(self): return self.command + ' ' + "".join(
def rc_statement(): """ Generate a RC statement parser that can be used to parse a RC file :rtype: pyparsing.ParserElement """ one_line_comment = "//" + restOfLine comments = cStyleComment ^ one_line_comment precompiler = Word("#", alphanums) + restOfLine language_definition = ( "LANGUAGE" + Word(alphas + "_").setResultsName("language") + Optional("," + Word(alphas + "_").setResultsName("sublanguage"))) block_start = (Keyword("{") | Keyword("BEGIN")).setName("block_start") block_end = (Keyword("}") | Keyword("END")).setName("block_end") reserved_words = block_start | block_end name_id = ~reserved_words + Word(alphas, alphanums + "_").setName("name_id") numbers = Word(nums) integerconstant = numbers ^ Combine("0x" + numbers) constant = Combine( Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=" ", ) combined_constants = delimitedList(constant, "|") concatenated_string = OneOrMore(quotedString) block_options = Optional( SkipTo(Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString("caption")) + SkipTo(block_start)("post_caption") undefined_control = (Group( name_id.setResultsName("id_control") + delimitedList(concatenated_string ^ constant ^ numbers ^ Group(combined_constants)).setResultsName("values_")) | one_line_comment) block = block_start + ZeroOrMore(undefined_control)("controls") + block_end dialog = (name_id("block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG"))("block_type") + block_options + block) string_table = Keyword("STRINGTABLE")("block_type") + block_options + block menu_item = Keyword("MENUITEM")("block_type") + ( commaSeparatedList("values_") | Keyword("SEPARATOR")) popup_block = Forward() popup_block <<= Group( Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start + ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*") menu = (name_id("block_id") + Keyword("MENU")("block_type") + block_options + block_start + ZeroOrMore(popup_block) + block_end) return comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu
node_element = node.setResultsName("node") edge_start = (rel_source | rel_target_left) edge_end = (rel_target_right | rel_source) undirected_edge = rel_source + rel_source undirected_path = rel_source + Suppress(Literal("*")) + rel_source edge_to_right = rel_source + rel_target_right edge_to_left = rel_target_left + rel_source path_to_right = rel_source + Suppress(Literal("*")) + rel_target_right path_to_left = rel_target_left + Suppress(Literal("*")) + rel_source pattern_element = Forward() unfinished_edge = ( Group(edge_to_right + pattern_element)("edge_to_right") | Group(edge_to_left + pattern_element)("edge_to_left") | Group(undirected_edge + pattern_element)("undirected_edge") | Group(path_to_right + pattern_element)("path_to_right") | Group(path_to_left + pattern_element)("path_to_left") | Group(undirected_path + pattern_element)("undirected_path")) pattern_element << (Group(node)("starting_node") + Optional(unfinished_edge)) # Syntax for patterns pattern = delimitedList(Group(pattern_element)) # Syntax for conditions boolean_connective = (and_connective | or_connective) condition_member = (string_literal | integer | floatnumber | Group( var.setResultsName("var") + property_start +
# Modified by Aaron Quinlan, 2012 # from pyparsing import Literal, CaselessLiteral, Word, Upcase, delimitedList, Optional, \ Combine, Group, alphas, nums, alphanums, ParseException, Forward, oneOf, quotedString, \ ZeroOrMore, restOfLine, Keyword # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) # ARQ 2012-Feb-10: allow struct-like column names, e.,g. gt_types.sample1 (add + ".$") ident = Word( alphas, alphanums + "_$" + ".$" ).setName("identifier") columnName = Upcase( delimitedList( ident, ".", combine=True ) ) columnNameList = Group( delimitedList( columnName ) ) tableName = Upcase( delimitedList( ident, ".", combine=True ) ) tableNameList = Group( delimitedList( tableName ) ) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) # ARQ 2012-Feb-10: add "like" as an operator like_ = Keyword("like", caseless=True) E = CaselessLiteral("E") # ARQ 2012-Feb-10: add "like" as a binop binop = oneOf("= != < > >= <= eq ne lt le gt ge like", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) |
class SentencePatternMatcher(): equals = Literal('=') word = Word(alphanums) tagAndVal = Group( word.setResultsName("tag") + Suppress(":") + word.setResultsName("val")).setResultsName("tagAndVal") + Suppress( Optional(";")) token = Suppress("[") + Group( ZeroOrMore(tagAndVal)).setResultsName("tagAndVals") + Suppress("]") edge = oneOf(["<", ">"]) + word.setResultsName("label") # parse [tag1:val1;tag2:val2] and return python dictionary def parseNode(self, tokenString): try: result = self.token.parseString(tokenString) except pyparsing.ParseException: print "Path parsing exception: Malformed token string: ", tokenString requirementsDict = {} #print "result:",result for r in result.tagAndVals: #print "r:",r #print "tagAndVal.tag=",r.tag #print "tagAndVal.val=",r.val requirementsDict[r.tag] = r.val return requirementsDict #print "tagAndVal/s:",result.tagAndVals.tagAndVal # return label from edge definition def parseTag(self, tagString): try: result = self.edge.parseString(tagString) except pyparsing.ParseException: print "Path parsing exception: Malformed edge string: ", tagString return result.label # check sentence against pattern def matchPathInSentence(self, startNodeIds, sgpath, s): tokens = sgpath.split(' ') visitedNodeIds = currentPossibleNodeIds = startNodeIds match = 1 for token in tokens: # we are now at token: if token.startswith("["): requirementsList = self.parseNode(token) # check all possible current nodes for complying with requirements: validIds = [] for nodeId in currentPossibleNodeIds: if s.checkNodeRequirements(nodeId, requirementsList): validIds.append(nodeId) # no node meets those requirements: if len(validIds) == 0: match = 0 break else: currentPossibleNodeIds = validIds visitedNodeIds.extend(currentPossibleNodeIds) # we are now at edge indicating upward elif token.startswith("<"): requiredLabel = self.parseTag(token) # get nodes with this label: currentNodeIds = s.moveUp(currentPossibleNodeIds, visitedNodeIds, requiredLabel) if currentNodeIds is None: # label did not match match = 0 break else: currentPossibleNodeIds = currentNodeIds # we are now at edge indicating downward elif token.startswith(">"): requiredLabel = self.parseTag(token) currentNodeIds = s.moveDown(currentPossibleNodeIds, visitedNodeIds, requiredLabel) if currentNodeIds is None: # label did not match match = 0 break else: currentPossibleNodeIds = currentNodeIds if match == 0: currentPossibleNodeIds = None return currentPossibleNodeIds
def _create_grammar(): """Create the DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums).setName('positive integer') number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') frame_id = Word(nums).setName('frame id') version = Group(Keyword('VERSION') - QuotedString('"', multiline=True)) version.setName(VERSION) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') - colon - Group(ZeroOrMore(symbol))) symbols.setName('NS_') discard = Suppress(Keyword('BS_') - colon).setName('BS_') nodes = Group(Keyword('BU_') - colon - Group(ZeroOrMore(node))) nodes.setName('BU_') signal = Group( Keyword(SIGNAL) - Group(word + Optional(word)) - colon - Group(positive_integer - pipe - positive_integer - at - positive_integer - sign) - Group(lp - number - comma - number - rp) - Group(lb - number - pipe - number - rb) - QuotedString('"', multiline=True) - Group(delimitedList(node))) signal.setName(SIGNAL) message = Group( Keyword(MESSAGE) - frame_id - word - colon - positive_integer - word - Group(ZeroOrMore(signal))) message.setName(MESSAGE) event = Suppress( Keyword(EVENT) - word - colon - positive_integer - lb - number - pipe - number - rb - QuotedString('"', multiline=True) - number - number - word - node - scolon) event.setName(EVENT) comment = Group( Keyword(COMMENT) - ((Keyword(MESSAGE) - frame_id - QuotedString('"', multiline=True) - scolon).setName(MESSAGE) | (Keyword(SIGNAL) - frame_id - word - QuotedString('"', multiline=True) - scolon).setName(SIGNAL) | (Keyword(NODES) - word - QuotedString('"', multiline=True) - scolon).setName(NODES) | (Keyword(EVENT) - word - QuotedString('"', multiline=True) - scolon).setName(EVENT) | (QuotedString('"', multiline=True) - scolon).setName('QuotedString'))) comment.setName(COMMENT) attribute_definition = Group( Keyword(ATTRIBUTE_DEFINITION) - ((QuotedString('"', multiline=True)) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString('"', multiline=True)) - word - (scolon | (Group( ZeroOrMore( Group((comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition.setName(ATTRIBUTE_DEFINITION) attribute_definition_default = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT) - QuotedString('"', multiline=True) - (number | QuotedString('"', multiline=True)) - scolon) attribute_definition_default.setName(ATTRIBUTE_DEFINITION_DEFAULT) attribute = Group( Keyword(ATTRIBUTE) - QuotedString('"', multiline=True) - Group( Optional((Keyword(MESSAGE) + frame_id) | (Keyword(SIGNAL) + frame_id + word) | (Keyword(NODES) + word))) - (QuotedString('"', multiline=True) | number) - scolon) attribute.setName(ATTRIBUTE) choice = Group( Keyword(CHOICE) - Group(Optional(frame_id)) - word - Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) - scolon) choice.setName(CHOICE) value_table = Group( Keyword(VALUE_TABLE) - word - Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) - scolon) value_table.setName(VALUE_TABLE) signal_type = Group( Keyword(SIGNAL_TYPE) - frame_id - word - colon - positive_integer - scolon) signal_type.setName(SIGNAL_TYPE) signal_multiplexer_values = Group( Keyword(SIGNAL_MULTIPLEXER_VALUES) - frame_id - word - word - Group( delimitedList(positive_integer - Suppress('-') - Suppress(positive_integer))) - scolon) signal_multiplexer_values.setName(SIGNAL_MULTIPLEXER_VALUES) message_add_sender = Group( Keyword(MESSAGE_TX_NODE) - frame_id - colon - Group(delimitedList(node)) - scolon) message_add_sender.setName(MESSAGE_TX_NODE) attribute_definition_rel = Group( Keyword(ATTRIBUTE_DEFINITION_REL) - (QuotedString('"', multiline=True) | (Keyword(NODES_REL) + QuotedString('"', multiline=True))) - word - (scolon | (Group( ZeroOrMore( Group((comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_rel.setName(ATTRIBUTE_DEFINITION_REL) attribute_definition_default_rel = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT_REL) - QuotedString('"', multiline=True) - (number | QuotedString('"', multiline=True)) - scolon) attribute_definition_default_rel.setName(ATTRIBUTE_DEFINITION_DEFAULT_REL) attribute_rel = Group( Keyword(ATTRIBUTE_REL) - QuotedString('"', multiline=True) - Keyword(NODES_REL) - word - Keyword(SIGNAL) - frame_id - word - positive_integer - scolon) attribute_rel.setName(ATTRIBUTE_REL) entry = (version | symbols | discard | nodes | message | comment | attribute_definition | attribute_definition_default | attribute | choice | value_table | signal_type | signal_multiplexer_values | message_add_sender | attribute_definition_rel | attribute_definition_default_rel | attribute_rel | event) frame_id.setParseAction(lambda _s, _l, t: int(t[0])) return OneOrMore(entry) + StringEnd()
# <and> ::= 'and' # <not> ::= 'not' # <expression> ::= <term> { <or><term> } # <term> ::= <factor> { <and><factor> } # <factor> ::= <constant> | <not><factor> | (<expression>) l_par, r_par = Suppress('('), Suppress(')') and_op = Keyword('and') or_op = Keyword('or') not_op = Keyword('not') variable = Word('pqr', exact=1) constant = Keyword('True') | Keyword('False') expr = Forward() factor = Forward() factor <<= constant | variable | Group(not_op + factor) | Group(l_par + expr + r_par) term = factor + ZeroOrMore(and_op + factor) expr <<= term + ZeroOrMore(or_op + factor) test_strings = [ 'True', 'not True', 'p', 'q and r', '(q and r)', '(False)', '(p and True)', '(p and q) or p', '(p and (not q or r)) or p', ]
def __init__(self): ParserElement.enablePackrat() unit_years = (CaselessLiteral("years") | CaselessLiteral("year") | CaselessLiteral("y")) years = ( Word(nums).setParseAction(lambda s, l, t: [int(t[0])])("years") + unit_years) unit_months = (CaselessLiteral("months") | CaselessLiteral("month") | CaselessLiteral("mo")) months = ( Word(nums).setParseAction(lambda s, l, t: [int(t[0])])("months") + unit_months) unit_weeks = (CaselessLiteral("weeks") | CaselessLiteral("week") | CaselessLiteral("w")) weeks = ( Word(nums).setParseAction(lambda s, l, t: [int(t[0])])("weeks") + unit_weeks) unit_days = (CaselessLiteral("days") | CaselessLiteral("day") | CaselessLiteral("d")) days = ( Word(nums).setParseAction(lambda s, l, t: [int(t[0])])("days") + unit_days) unit_hours = (CaselessLiteral("hours") | CaselessLiteral("hour") | CaselessLiteral("hrs") | CaselessLiteral("hr") | CaselessLiteral("h")) hours = ( Word(nums).setParseAction(lambda s, l, t: [int(t[0])])("hours") + unit_hours) unit_minutes = (CaselessLiteral("minutes") | CaselessLiteral("minute") | CaselessLiteral("mins") | CaselessLiteral("min") | CaselessLiteral("m")) minutes = ( Word(nums).setParseAction(lambda s, l, t: [int(t[0])])("minutes") + unit_minutes) unit_seconds = (CaselessLiteral("seconds") | CaselessLiteral("second") | CaselessLiteral("secs") | CaselessLiteral("sec") | CaselessLiteral("s")) seconds = ( Word(nums).setParseAction(lambda s, l, t: [int(t[0])])("seconds") + unit_seconds) time_unit = years | months | weeks | days | hours | minutes | seconds time_unit_separators = Optional(Literal(",")) + Optional( CaselessLiteral("and")) full_time = time_unit + ZeroOrMore( Suppress(Optional(time_unit_separators)) + time_unit) every_time = Group(CaselessLiteral("every") + full_time)("every") in_opt_time = Group(Optional(CaselessLiteral("in")) + full_time)("in") in_req_time = Group(CaselessLiteral("in") + full_time)("in") reminder_text_capture = SkipTo(every_time | in_req_time | StringEnd()).setParseAction( tokenMap(str.strip)) reminder_text_optional_prefix = Optional( Suppress(CaselessLiteral("to"))) reminder_text = reminder_text_optional_prefix + reminder_text_capture( "text") in_every_text = in_opt_time + every_time + reminder_text every_in_text = every_time + in_req_time + reminder_text in_text_every = in_opt_time + reminder_text + every_time every_text_in = every_time + reminder_text + in_req_time text_in_every = reminder_text + in_req_time + every_time text_every_in = reminder_text + every_time + in_req_time in_text = in_opt_time + reminder_text text_in = reminder_text + in_req_time every_text = every_time + reminder_text text_every = reminder_text + every_time template = (in_every_text | every_in_text | in_text_every | every_text_in | text_in_every | text_every_in | in_text | text_in | every_text | text_every) self.parser = template
class RawNginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") # Matches anything that is not a special character, and ${SHELL_VARS}, AND # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 dquoted = QuotedString('"', multiline=True, unquoteResults=False) squoted = QuotedString("'", multiline=True, unquoteResults=False) nonspecial = Regex(r"[^\{\};,]") varsub = Regex(r"(\$\{\w+\})") # nonspecial nibbles one character at a time, but the other objects take # precedence. We use ZeroOrMore to allow entries like "break ;" to be # parsed as assignments value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space charset_map_statement = space + Literal( "charset_map") + space + value + space + value map_statement = space + Literal( "map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost # certainly too permissive and may be wrong in other ways, but it should # preserve things correctly in mmmmost or all cases. # # - I can neither prove nor disprove that it is correct wrt all escaped # semicolon situations # Addresses https://github.com/fatiherikli/nginxparser/issues/19 map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() block_innards = Group( ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
def script(self): # constants left_bracket = Suppress("{") right_bracket = Suppress("}") semicolon = Suppress(";") space = White().suppress() keyword = Word(alphanums + ".+-_/") path = Word(alphanums + ".-_/") variable = Word("$_-" + alphanums) value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+') value_sq = NginxQuotedString(quoteChar="'") value_dq = NginxQuotedString(quoteChar='"') value = (value_dq | value_sq | value_wq) # modifier for location uri [ = | ~ | ~* | ^~ ] location_modifier = (Keyword("=") | Keyword("~*") | Keyword("~") | Keyword("^~")) # modifier for if statement if_modifier = Combine( Optional("!") + (Keyword("=") | Keyword("~*") | Keyword("~") | (Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x"))))) condition = ((if_modifier + Optional(space) + value) | (variable + Optional(space + if_modifier + Optional(space) + value))) # rules include = (Keyword("include") + space + value + semicolon)("include") directive = (keyword + ZeroOrMore(space + value) + semicolon)("directive") file_delimiter = (Suppress("# configuration file ") + path + Suppress(":"))("file_delimiter") comment = (Suppress('#') + Regex(r".*"))("comment") hash_value = Group(value + ZeroOrMore(space + value) + semicolon)("hash_value") generic_block = Forward() if_block = Forward() location_block = Forward() hash_block = Forward() unparsed_block = Forward() sub_block = OneOrMore( Group(if_block | location_block | hash_block | generic_block | include | directive | file_delimiter | comment | unparsed_block)) if_block << ( Keyword("if") + Suppress("(") + Group(condition) + Suppress(")") + Group(left_bracket + Optional(sub_block) + right_bracket))("block") location_block << (Keyword("location") + Group( Optional(space + location_modifier) + Optional(space) + value) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") hash_block << (keyword + Group(OneOrMore(space + variable)) + Group(left_bracket + Optional(OneOrMore(hash_value)) + right_bracket))("block") generic_block << ( keyword + Group(ZeroOrMore(space + variable)) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") unparsed_block << ( keyword + Group(ZeroOrMore(space + variable)) + nestedExpr(opener="{", closer="}"))("unparsed_block") return sub_block
def _make_arabic_parser(): escapechar = "//" # wordchars = printables # for specialchar in '*?^():"{}[] ' + escapechar: # wordchars = wordchars.replace(specialchar, "") # wordtext = Word(wordchars) alephba = """ abcdefghijklmnopqrstuvwxyz_ األآإـتنمكطدجحخهعغفقثصضشسيبئءؤرىةوزظذ """ wordtext = CharsNotIn('//*؟^():"{}[]$><%~#،,\' +-|') escape = Suppress( escapechar ) \ + ( Word( printables, exact = 1 ) | White( exact = 1 ) ) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word("؟?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars # , or the next token wildstart = wildchars \ + ( OneOrMore( wordtoken + Optional( wildchars ) ) \ | FollowedBy( White() \ | StringEnd() ) ) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") endfence = Literal("]") rangeitem = QuotedString('"') | wordtoken to = Keyword( "الى" ) \ | Keyword( "إلى" ) \ | Keyword( "To" ) \ | Keyword( "to" ) \ | Keyword( "TO" ) openstartrange = Group( Empty() ) \ + Suppress( to + White() ) \ + Group( rangeitem ) openendrange = Group( rangeitem ) \ + Suppress( White() + to ) \ + Group( Empty() ) normalrange = Group( rangeitem ) \ + Suppress( White() + to + White() ) \ + Group( rangeitem ) range = Group( startfence \ + ( normalrange | openstartrange | openendrange ) \ + endfence ).setResultsName( "Range" ) # synonyms syn_symbol = Literal("~") synonym = Group(syn_symbol + wordtoken).setResultsName("Synonyms") # antonyms ant_symbol = Literal("#") antonym = Group(ant_symbol + wordtoken).setResultsName("Antonyms") # derivation level 1,2 derive_symbole = Literal("<") | Literal(">") derivation = Group(OneOrMore(derive_symbole) + wordtoken).setResultsName("Derivation") # spellerrors # spellerrors=Group(QuotedString('\'')).setResultsName("Errors") spellerrors_symbole = Literal("%") spellerrors = Group(spellerrors_symbole + wordtoken).setResultsName("SpellErrors") # shakl:must uplevel to boostable tashkil_symbol = Literal("'") tashkil = Group( tashkil_symbol + \ ZeroOrMore( wordtoken | White() ) + \ tashkil_symbol ).setResultsName( "Tashkil" ) # tuple search (root,pattern,type) starttuple = Literal("{") endtuple = Literal("}") bettuple = Literal("،") | Literal(",") wordtuple = Group(Optional(wordtoken)) tuple = Group( starttuple + \ wordtuple + \ ZeroOrMore( bettuple + wordtuple ) + \ endtuple ).setResultsName( "Tuple" ) # A word-like thing generalWord = range | wildcard | plainWord | tuple | antonym | synonym | \ derivation | tashkil | spellerrors # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group( boostableUnit + \ Suppress( "^" ) + \ Word( "0123456789", ".0123456789" ) ).setResultsName( "Boost" ) # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group( ( Word( alephba + "_" ) | Word( alphanums + "_" ) ) + \ Suppress( ':' ) + \ fieldableUnit ).setResultsName( "Field" ) # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress( Keyword( "ليس" ) | Keyword( "NOT" ) ) + \ Suppress( White() ) + \ unit ).setResultsName( "Not" ) generalUnit = operatorNot | unit andToken = Keyword("و") | Keyword("AND") orToken = Keyword("أو") | Keyword("او") | Keyword("OR") andNotToken = Keyword("وليس") | Keyword("ANDNOT") operatorAnd = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( andToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( "+" ) ) + \ expression ) ).setResultsName( "And" ) operatorOr = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( orToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( "|" ) ) + \ expression ) ).setResultsName( "Or" ) operatorAndNot = Group( ( unit + \ Suppress( White() ) + \ Suppress( andNotToken ) + \ Suppress( White() ) + \ expression ) | \ ( unit + \ Suppress( Literal( "-" ) ) + \ expression ) ).setResultsName( "AndNot" ) expression << ( OneOrMore( operatorAnd | operatorOr | operatorAndNot | \ generalUnit | Suppress( White() ) ) | Empty() ) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def _create_parser(self): OBRACE, CBRACE, SEMI, OPAR, CPAR = list(map(Suppress, "{};()")) QUOTE = Suppress(Word("\"")) # keywords (IMPORT, COMMUNICATIONS, LANGUAGE, COMPONENT, CPP, CPP11, GUI, QWIDGET, QMAINWINDOW, QDIALOG, QT, PYTHON, REQUIRES, IMPLEMENTS, SUBSCRIBESTO, PUBLISHES, OPTIONS, TRUE, FALSE, INNERMODELVIEWER, STATEMACHINE, VISUAL) = list( map( CaselessKeyword, """ import communications language component cpp cpp11 gui QWidget QMainWindow QDialog Qt python requires implements subscribesTo publishes options true false InnerModelViewer statemachine visual""".split())) identifier = Word(alphas + "_", alphanums + "_") PATH = CharsNotIn("\";") # Imports idslImport = Group( Suppress(IMPORT) - QUOTE + PATH.setResultsName('idsl_path') - QUOTE + SEMI) idslImports = ZeroOrMore(idslImport).setResultsName("imports") commType = Optional( OPAR - (CaselessKeyword("ice") | CaselessKeyword("ros") | CaselessKeyword("ros2")).setResultsName("type") + CPAR) implementsList = Optional(IMPLEMENTS - Group( delimitedList( Group(identifier.setResultsName("impIdentifier") + commType))).setResultsName("implements") + SEMI) requiresList = Optional(REQUIRES - Group( delimitedList( Group(identifier.setResultsName("reqIdentifier") + commType))).setResultsName("requires") + SEMI) subscribesList = Optional(SUBSCRIBESTO - Group( delimitedList( Group(identifier.setResultsName("subIdentifier") + commType))).setResultsName("subscribesTo") + SEMI) publishesList = Optional(PUBLISHES - Group( delimitedList( Group(identifier.setResultsName("pubIdentifier") + commType))).setResultsName("publishes") + SEMI) communicationList = Group( implementsList & requiresList & subscribesList & publishesList).setResultsName("communications") communications = COMMUNICATIONS.suppress( ) - OBRACE + communicationList + CBRACE + SEMI # Language language_options = (CPP | CPP11 | PYTHON).setResultsName('language') language = LANGUAGE.suppress() - language_options - SEMI # InnerModelViewer innermodelviewer = Group( Optional(INNERMODELVIEWER.suppress() - (TRUE | FALSE) + SEMI))('innermodelviewer') # GUI gui_options = QWIDGET | QMAINWINDOW | QDIALOG gui = Group( Optional(GUI.suppress() - QT + OPAR - gui_options('gui_options') - CPAR + SEMI)) # additional options options = Group( Optional(OPTIONS.suppress() - identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + SEMI)) statemachine = Group( Optional(STATEMACHINE.suppress() - QUOTE + CharsNotIn("\";").setResultsName('machine_path') + QUOTE + Optional( VISUAL.setResultsName('visual').setParseAction( lambda t: True)) + SEMI)) # Component definition componentContents = Group( communications - language + Optional(gui('gui')) + Optional(options('options')) + Optional(innermodelviewer) + Optional(statemachine('statemachine'))).setResultsName("content") component = Group(COMPONENT.suppress() - identifier("name") + OBRACE + componentContents + CBRACE + SEMI).setResultsName("component") CDSL = idslImports - component CDSL.ignore(cppStyleComment) return CDSL
# XXX: we are cheating a lot here as there is also long bracket form... and some other differences... literal_string = (QuotedString("'", "\\") | QuotedString( '"', "\\")).setParseAction(lambda t, p, v: ast.LiteralString(*v)) exp = Forward() explist = Forward() tableconstructor = Forward() # var ::= Name var = name.copy().setParseAction(from_parse_result(ast.Var)) # There is additional (":" + name) prefix which is moved here from # functioncall definition # args ::= ‘(’ [explist] ‘)’ | tableconstructor | LiteralString args = (Optional(Suppress(':') + var, default=None).setResultsName('method') + ((Suppress('(') + Optional(explist, default=[]) + Suppress(')')) | tableconstructor | Group(literal_string)).setResultsName('args')) def function_or_method_call(parts): fun, method, args = parts[0], parts[1], parts[2] if method is None: return ast.FunctionCall(fun, list(args)) else: return ast.MethodCall(fun, method, list(args)) # I've made a split of cases to avoid infinite recurrsion # functioncall ::= prefixexp args | prefixexp ‘:’ Name args functioncall_simple = ( (var | (Suppress('(') + exp + Suppress(')'))).setResultsName('fun') + args).setParseAction(function_or_method_call)
def _create_dbc_grammar(): """Create DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums) number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') version = Group(Keyword('VERSION') + QuotedString('"', multiline=True)) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') + colon + Group(ZeroOrMore(symbol))) discard = Suppress(Keyword('BS_') + colon) nodes = Group(Keyword('BU_') + colon + Group(ZeroOrMore(node))) signal = Group(Keyword(SIGNAL) + Group(word + Optional(word)) + colon + Group(positive_integer + pipe + positive_integer + at + positive_integer + sign) + Group(lp + number + comma + number + rp) + Group(lb + number + pipe + number + rb) + QuotedString('"', multiline=True) + Group(delimitedList(node))) message = Group(Keyword(MESSAGE) + positive_integer + word + colon + positive_integer + word + Group(ZeroOrMore(signal))) event = Suppress(Keyword(EVENT) + word + colon + positive_integer + lb + number + pipe + number + rb + QuotedString('"', multiline=True) + number + number + word + node + scolon) comment = Group(Keyword(COMMENT) + ((Keyword(MESSAGE) + positive_integer + QuotedString('"', multiline=True) + scolon) | (Keyword(SIGNAL) + positive_integer + word + QuotedString('"', multiline=True) + scolon) | (Keyword(NODES) + word + QuotedString('"', multiline=True) + scolon) | (Keyword(EVENT) + word + QuotedString('"', multiline=True) + scolon))) attribute_definition = Group(Keyword(ATTRIBUTE_DEFINITION) + ((QuotedString('"', multiline=True)) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString('"', multiline=True)) + word + (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_default = Group(Keyword(ATTRIBUTE_DEFINITION_DEFAULT) + QuotedString('"', multiline=True) + (positive_integer | QuotedString('"', multiline=True)) + scolon) attribute = Group(Keyword(ATTRIBUTE) + QuotedString('"', multiline=True) + Group(Optional((Keyword(MESSAGE) + positive_integer) | (Keyword(SIGNAL) + positive_integer + word) | (Keyword(NODES) + word))) + (QuotedString('"', multiline=True) | positive_integer) + scolon) choice = Group(Keyword(CHOICE) + Optional(positive_integer) + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) value_table = Group(Keyword(VALUE_TABLE) + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) entry = (version | symbols | discard | nodes | message | comment | attribute_definition | attribute_definition_default | attribute | choice | value_table | event) return OneOrMore(entry) + StringEnd()
def __init__(self, n, colors, shapes, max_constant=5): """ :param n: length of side of the grids :param colors: list of color names :param shapes: list of shape names """ self.colors = Or([Keyword(w) for w in colors]) self.colors ^= Keyword("getMarkerColor()") self.shapes = Or([Keyword(w) for w in shapes]) self.shapes ^= Keyword("getMarkerShape()") self.positions = Or([Keyword(str(i)) for i in range(n)]) self.constants = Or([Keyword(str(i)) for i in range(1, max_constant)]) self.actions = (("move(" + self.positions + "," + self.positions + ")") | "moveUp()" | "moveDown()" | "moveLeft()" | "moveRight()" | "moveTop()" | "moveBottom()" | "moveLeftmost()" | "moveRightmost()" | "moveToMovableMarker()" | "pickMarker()" | "putMarker()" | "fixMarker()") self.conditions = (Group(self.shapes + "==" + self.shapes) | Group(self.colors + "==" + self.colors) | "markersPresent()" | "movableMarkersPresent()" | "existMovableMarkers()" | "upperBoundary()" | "lowerBoundary()" | "leftBoundary()" | "rightBoundary()" | "true") self.conditions = (self.conditions | Group(Keyword("not") + self.conditions)) block = Forward() stmt = (Group( Keyword("while") + "(" + self.conditions + ")" + "{" + Group(block) + "}") | Group( Keyword("repeat") + "(" + self.constants + ")" + "{" + Group(block) + "}") | Group( Keyword("if") + "(" + self.conditions + ")" + "{" + Group(block) + "}") | Group( Keyword("ifelse") + "(" + self.conditions + ")" + "{" + Group(block) + "}" + Keyword("else") + "{" + Group(block) + "}") | Group(self.actions + ";")) block << OneOrMore(stmt) # stmt ^= block self.statements = block self.program = Keyword("def") + Keyword("run()") + "{" \ + Group(self.statements) + "}"
kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("{}_ = Keyword('{}')".format(kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK fieldDefn = ((REQUIRED_ | OPTIONAL_ | REPEATED_)("fieldQualifier") - typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI) # enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}' enumDefn = ENUM_("typespec") - ident('name') + LBRACE + Dict( ZeroOrMore(Group(ident + EQ + integer + SEMI)))('values') + RBRACE # extensionsDefn ::= 'extensions' integer 'to' integer ';' extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI # messageExtension ::= 'extend' ident '{' messageBody '}' messageExtension = EXTEND_ - ident + LBRACE + messageBody + RBRACE # messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }*
from mapmaker.flatmap.feature import Feature, FeatureMap from mapmaker.flatmap.layers import FeatureLayer from mapmaker.knowledgebase import get_knowledge from mapmaker.sources.markup import ID_TEXT from mapmaker.utils import log, FilePath #=============================================================================== NERVES = delimitedList(ID_TEXT) LINE_ID = ID_TEXT PATH_LINES = delimitedList(LINE_ID) NODE_ID = ID_TEXT ROUTE_NODE_GROUP = NODE_ID | Group( Suppress('(') + delimitedList(NODE_ID) + Suppress(')')) ROUTE_NODES = delimitedList(ROUTE_NODE_GROUP) #=============================================================================== def parse_path_lines(line_ids): #============================== try: if isinstance(line_ids, str): path_lines = PATH_LINES.parseString(line_ids, parseAll=True) else: path_lines = [ LINE_ID.parseString(line_id)[0] for line_id in line_ids ] except ParseException:
# atomic_number ::= decimal atomic_number = decimal # column_name = 'Atomic Number' | 'Atomic Symbol' | 'Mass Number' | 'Relative Atomic Mass' | \ # 'Isotopic Composition' | 'Standard Atomic Weight' | 'Notes' column_name = oneOf(COLUMN_NAMES_MAPPING.keys()).setParseAction( lambda t: COLUMN_NAMES_MAPPING[t[0]]) # isotope ::= column_name eq atomic_number \ # column_name eq symbol \ # column_name eq mass_number \ # column_name eq atomic_mass \ # column_name eq [isotopic_comp] \ # column_name eq [atomic_weight] \ # column_name eq [notes] isotope = Dict( Group(column_name + EQ + atomic_number) ) + \ Dict( Group(column_name + EQ + symbol )) + \ Dict( Group(column_name + EQ + mass_number) ) + \ Dict( Group(column_name + EQ + atomic_mass) ) + \ Dict( Group(column_name + EQ + Optional(isotopic_comp)) ) + \ Dict( Group(column_name + EQ + Optional(atomic_weight)) ) + \ Dict( Group(column_name + EQ + Optional(notes)) ) def remove_empty_keys(tokens): for key, item in tokens.items(): if item == '': del tokens[key] return tokens
def __init__(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() bitop = Literal("^") addop = plus | minus | bitop multop = mult | div expop = Literal("**") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + \ ZeroOrMore((expop + factor).setParseAction(self.pushFirst)) term = factor + \ ZeroOrMore((multop + factor).setParseAction(self.pushFirst)) expr << term + \ ZeroOrMore((addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "**": operator.pow, "^": lambda a, b: int(a) ^ int(b) } self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "exp": math.exp, "sqrt": math.sqrt, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
def create_dbc_grammar(): """Create DBC grammar. """ # DBC file grammar word = Word(printables) integer = Optional(Literal('-')) + Word(nums) number = Word(nums + '.Ee-') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) version = Group(Keyword('VERSION') + QuotedString('"', multiline=True)) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') + colon + Group(ZeroOrMore(symbol))) discard = Suppress(Keyword('BS_') + colon) ecu = Group( Keyword('BU_') + colon + ZeroOrMore(Word(printables).setWhitespaceChars(' \t'))) signal = Group( Keyword(SIGNAL) + word + colon + Group(integer + pipe + integer + at + integer + sign) + Group(lp + number + comma + number + rp) + Group(lb + number + pipe + number + rb) + QuotedString('"', multiline=True) + word) message = Group( Keyword(MESSAGE) + integer + word + integer + word + Group(ZeroOrMore(signal))) comment = Group( Keyword(COMMENT) + ((Keyword(MESSAGE) + integer + QuotedString('"', multiline=True) + scolon) | (Keyword(SIGNAL) + integer + word + QuotedString('"', multiline=True) + scolon))) choice = Group( Keyword(CHOICE) + integer + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) entry = version | symbols | discard | ecu | message | comment | choice grammar = OneOrMore(entry) return grammar
# License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from pyparsing import (CharsNotIn, Group, Forward, Literal, Suppress, Word, QuotedString, ZeroOrMore, alphas, alphanums) from string import Template import re # Grammar for CMake comment = Literal('#') + ZeroOrMore(CharsNotIn('\n')) quoted_argument = QuotedString('\"', '\\', multiline=True) unquoted_argument = CharsNotIn('\n ()#\"\\') argument = quoted_argument | unquoted_argument | Suppress(comment) arguments = Forward() arguments << (argument | (Literal('(') + ZeroOrMore(arguments) + Literal(')'))) identifier = Word(alphas, alphanums+'_') command = Group(identifier + Literal('(') + ZeroOrMore(arguments) + Literal(')')) file_elements = command | Suppress(comment) cmake = ZeroOrMore(file_elements) def extract_arguments(parsed): """Extract the command arguments skipping the parentheses""" return parsed[2:len(parsed) - 1] def match_block(command, parsed, start): """Find the end of block starting with the command""" depth = 0 end = start + 1 endcommand = 'end' + command while parsed[end][0] != endcommand or depth > 0:
from pyparsing import Word, OneOrMore, Group, ParseException ##### pyparsing based cigar parser def convertIntegers(tokens): return int(tokens[0]) alt_type_tokens = "SIMDX" digits = "0123456789" an_alt = Word(alt_type_tokens) alt_length = Word(digits).setParseAction(convertIntegers) alt_and_length = Group(alt_length + an_alt) cigar_string_parser = OneOrMore(alt_and_length) def parse_cigar_string(cigar_string): return cigar_string_parser.parseString(cigar_string) # Exemple usage: # cigar_string.parseString("76M1I257M1I22M1I99M") # % timeit cigar_string.parseString("76M1I257M1I22M1I99M")[1]
def parser(cls): color = Group(Keyword('color') + integer) base_contents = Each([color] + obstacle_items) base = Dict(Keyword('base').suppress() + base_contents + end) base.setParseAction(lambda toks: cls(**dict(toks))) return base
# formula_parser.py # -*- coding: utf-8 -*- # Christian Hill # 12/7/2011 from pyparsing import Word, Group, Optional, OneOrMore, ParseException,\ Literal, StringEnd import elements caps = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' lowers = caps.lower() digits = '0123456789' element = Word(caps, lowers) integer = Word(digits) elementRef = Group(element + Optional(integer, default='1')) chemicalFormula = OneOrMore(elementRef) plusminus = Literal('+') | Literal('-') charge = Group(plusminus + Optional(integer, default='1')) chargedChemicalFormula = Group(chemicalFormula) + Optional(charge)\ + StringEnd() class FormulaError(Exception): def __init__(self, error_str): self.error_str = error_str def __str__(self): return self.error_str def get_stoichiometric_formula(formula):
integer = Word(nums).setParseAction(numeric) floatnum = Combine( Optional('-') + ('0' | Word('123456789', nums)) + Optional('.' + Word(nums)) + Optional(Word('eE', exact=1) + Word(nums + '+-', nums))) floatnum.setParseAction(numeric) end = Keyword('end').suppress() point2d = floatnum + floatnum # Note: Since we're just doing 2D, we ignore the z term of 3D points. point3d = floatnum + floatnum + floatnum.suppress() # Obstacle position = Group((Keyword('pos') | Keyword('position')) + point3d) size = Group(Keyword('size') + point3d) rotation = Group((Keyword('rot') | Keyword('rotation')) + floatnum) obstacle_items = [position, Optional(size), Optional(rotation)] class Box(object): """A basic obstacle type.""" def __init__(self, pos=None, position=None, rot=None, rotation=None, size=None): self.pos = pos or position self.rot = rot or rotation
| min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable heading = ( Literal( "+-------+------+------+------+------+------+------+------+------+") + "| | A1 | B1 | C1 | D1 | A2 | B2 | C2 | D2 |" + "+=======+======+======+======+======+======+======+======+======+" ).suppress() vert = Literal("|").suppress() number = Word(nums) rowData = Group(vert + Word(alphas) + vert + delimitedList(number, "|") + vert) trailing = Literal( "+-------+------+------+------+------+------+------+------+------+" ).suppress() datatable = heading + Dict(ZeroOrMore(rowData)) + trailing # now parse data and print results data = datatable.parseString(testData) print(data) pprint.pprint(data.asList()) print("data keys=", list(data.keys())) print("data['min']=", data['min']) print("data.max", data.max)
# definindo SQL tokens selectStmt = Forward( ) # token de espaço reservado usado para definir padrões de token recursivos SELECT, FROM, WHERE, AND, OR, IN, IS, NOT, NULL = map( CaselessKeyword, "select from where and or in is not null".split()) NOT_NULL = NOT + NULL ident = Word(alphas, alphanums + "_$").setName("identificador") columnName = delimitedList(ident, ".", combine=True).setName( "nome coluna" ) # delimitedList(expr, delim=',') - convenience function for matching one or more occurrences of expr, separated by delim columnName.addParseAction( ppc.upcaseTokens ) # upcaseTokens - converts all matched tokens to uppercase # addParseAction - faz a ação de transforma tudo em maiúsculo columnNameList = Group(delimitedList(columnName)) tableName = delimitedList(ident, ".", combine=True).setName("nome tabela") tableName.addParseAction(ppc.upcaseTokens) tableNameList = Group(delimitedList(tableName)) binop = oneOf( "= != < > >= <= eq ne lt le gt ge", caseless=True ) # oneOf(string, caseless=False) - convenience function for quickly declaring an alternative set of Literal tokens, by splitting the given string on whitespace boundaries. realNum = ppc.real() intNum = ppc.signed_integer() columnRval = realNum | intNum | quotedString | columnName # necessário para adicionar nas expressões algébricas whereCondition = Group((columnName + binop + columnRval) | (columnName + IN + Group("(" + delimitedList(columnRval) + ")")) | (columnName + IN + Group("(" + selectStmt + ")"))
+=======+======+======+======+======+======+======+======+======+ | min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable underline = Word("-=") number = pyparsing_common.integer vert = Literal("|").suppress() rowDelim = ("+" + ZeroOrMore(underline + "+")).suppress() columnHeader = Group(vert + vert + delimitedList(Word(alphas + nums), "|") + vert) heading = rowDelim + columnHeader("columns") + rowDelim rowData = Group(vert + Word(alphas) + vert + delimitedList(number, "|") + vert) trailing = rowDelim datatable = heading + Dict(ZeroOrMore(rowData)) + trailing # now parse data and print results data = datatable.parseString(testData) print(data.dump()) print("data keys=", list(data.keys())) print("data['min']=", data['min']) print("sum(data['min']) =", sum(data['min'])) print("data.max =", data.max) print("sum(data.max) =", sum(data.max))
def parse_file(self): """Parses an existing namelist file and creates a deck of cards to hold the data. After this is executed, you need to call the ``load_model()`` method to extract the variables from this data structure.""" infile = open(self.filename, 'r') data = infile.readlines() infile.close() # Lots of numerical tokens for recognizing various kinds of numbers digits = Word(nums) dot = "." sign = oneOf("+ -") ee = CaselessLiteral('E') | CaselessLiteral('D') num_int = ToInteger(Combine(Optional(sign) + digits)) num_float = ToFloat( Combine( Optional(sign) + ((digits + dot + Optional(digits)) | (dot + digits)) + Optional(ee + Optional(sign) + digits))) # special case for a float written like "3e5" mixed_exp = ToFloat(Combine(digits + ee + Optional(sign) + digits)) # I don't suppose we need these, but just in case (plus it's easy) nan = ToFloat(oneOf("NaN Inf -Inf")) numval = num_float | mixed_exp | num_int | nan strval = QuotedString(quoteChar='"') | QuotedString(quoteChar="'") b_list = "T TRUE True true F FALSE False false .TRUE. .FALSE. .T. .F." boolval = ToBool(oneOf(b_list)) fieldval = Word(alphanums) # Tokens for parsing a line of data numstr_token = numval + ZeroOrMore(Suppress(',') + numval) \ | strval data_token = numstr_token | boolval index_token = Suppress('(') + num_int + Suppress(')') card_token = Group(fieldval("name") + \ Optional(index_token("index")) + \ Suppress('=') + \ data_token("value") + Optional(Suppress('*') + num_int("dimension"))) multi_card_token = (card_token + ZeroOrMore(Suppress(',') + card_token)) array_continuation_token = numstr_token.setResultsName("value") array2D_token = fieldval("name") + Suppress("(") + \ Suppress(num_int) + Suppress(',') + \ num_int("index") + Suppress(')') + \ Suppress('=') + numval + \ ZeroOrMore(Suppress(',') + numval) # Tokens for parsing the group head and tail group_end_token = Literal("/") | Literal("$END") | Literal("$end") group_name_token = (Literal("$") | Literal("&")) + \ Word(alphanums).setResultsName("name") + \ Optional(multi_card_token) + \ Optional(group_end_token) # Comment Token comment_token = Literal("!") # Loop through each line and parse. current_group = None for line in data: line_base = line line = line.strip() # blank line: do nothing if not line: continue if current_group: # Skip comment cards if comment_token.searchString(line): pass # Process orindary cards elif multi_card_token.searchString(line): cards = multi_card_token.parseString(line) for card in cards: name, value = _process_card_info(card) self.cards[-1].append(Card(name, value)) # Catch 2D arrays like -> X(1,1) = 3,4,5 elif array2D_token.searchString(line): card = array2D_token.parseString(line) name = card[0] index = card[1] value = array(card[2:]) if index > 1: old_value = self.cards[-1][-1].value new_value = vstack((old_value, value)) self.cards[-1][-1].value = new_value else: self.cards[-1].append(Card(name, value)) # Arrays can be continued on subsequent lines # The value of the most recent card must be turned into an # array and appended elif array_continuation_token.searchString(line): card = array_continuation_token.parseString(line) if len(card) > 1: element = array(card[0:]) else: element = card.value if isinstance(self.cards[-1][-1].value, ndarray): new_value = append(self.cards[-1][-1].value, element) else: new_value = array([self.cards[-1][-1].value, element]) self.cards[-1][-1].value = new_value # Lastly, look for the group footer elif group_end_token.searchString(line): current_group = None # Everything else must be a pure comment else: print "Comment ignored: %s" % line.rstrip('\n') # Group ending '/' can also conclude a data line. if line[-1] == '/': current_group = None #print self.cards[-1][-1].name, self.cards[-1][-1].value else: group_name = group_name_token.searchString(line) # Group Header if group_name: group_name = group_name_token.parseString(line) current_group = group_name.name self.add_group(current_group) # Sometimes, variable definitions are included on the # same line as the namelist header if len(group_name) > 2: cards = group_name[2:] for card in cards: # Sometimes an end card is on the same line. if group_end_token.searchString(card): current_group = None else: name, value = _process_card_info(card) self.cards[-1].append(Card(name, value)) # If there is an ungrouped card at the start, take it as the # title for the analysis elif len(self.cards) == 0 and self.title == '': self.title = line # All other ungrouped cards are saved as free-form (card-less) # groups. # Note that we can't lstrip because column spacing might be # important. else: self.add_group(line_base.rstrip())