def grammar(g: Optional[Grammar] = None, isVerbose=False) -> Grammar: """Defines the grammar that parses grammar rule definitions.""" global GRAMMAR if not g: if GRAMMAR: return GRAMMAR else: g = Grammar("expr", isVerbose=isVerbose) s = symbols(g) g.group("ExprValuePrefix") g.group("ExprComment", s.EXPR_COMMENT) g.rule("ExprValue") g.rule("ExprTemplate", s.LB, s.ExprValue._as("value"), s.RB) g.rule("ExprList", s.LP, s.ExprValue.optional()._as("arg"), s.RP) # NOTE: Here we want to avoid using `ExprValue` as otherwise we'll end up # with really deeply nested matches. g.rule("ExprQuote", s.QUOTE, s.ExprValuePrefix._as("arg")) g.rule("ExprDecompose", s.DOT, s.ExprValuePrefix._as("arg")) g.rule("ExprPipe", s.PIPE, s.ExprValuePrefix._as("arg")) g.rule("ExprJoin", s.WS, s.ExprValuePrefix._as("arg")) g.rule("ExprRest", s.REST, s.ExprValuePrefix._as("arg")) s.ExprValuePrefix.set( s.ExprList, # 0 s.ExprQuote, s.ExprTemplate, s.ExprComment, s.NUMBER, # 6 s.STRING_DQ, s.EXPR_SINGLETON, s.EXPR_KEY, # NOTE: Query is going to be inserted here #10 s.EXPR_NAME, # 11 s.EXPR_SYMBOL, # 12 s.EXPR_VARIABLE, # 13 s.EXPR_TYPE, # 14 ) g.group("ExprValueSuffix").set( s.ExprPipe, s.ExprDecompose, s.ExprRest, s.ExprJoin, s.ExprComment, ) s.ExprValue.set(s.ExprValuePrefix._as("prefix"), s.ExprValueSuffix.zeroOrMore()._as("suffixes"), s.WS.zeroOrMore()) g.axiom = s.ExprValue g.skip = s.WS if not GRAMMAR: GRAMMAR = g g.setVerbose(isVerbose) return g
def grammar(g: Optional[Grammar] = None, isVerbose=False) -> Grammar: """Defines the grammar that parses grammar rule definitions.""" global GRAMMAR if not g: if GRAMMAR: return GRAMMAR else: g = Grammar("format", isVerbose=isVerbose) g = tree_grammar(g) s = symbols(g) # Expressions g.group("FormatValue") g.rule("FormatExpression") g.rule("FormatReference", s.FORMAT_NAME, s.FORMAT_BINDING.optional(), s.CARDINALITY.optional()) g.rule("FormatGroup", s.LP, s.FormatExpression, s.RP, s.CARDINALITY.optional()) g.group("FormatTokenString", s.STRING_SQ, s.STRING_DQ) g.rule("FormatTokenRange", s.TOKEN_RANGE) g.group("FormatTokenValue", s.FormatTokenString, s.FormatTokenRange) g.rule("FormatToken", s.FormatTokenValue, s.CARDINALITY.optional()) g.rule("FormatExpressionOr", s.PIPE, s.FormatValue) g.rule("FormatExpressionAnd", s.UNDERSCORE.optional(), s.FormatValue) g.group("FormatExpressionContinuation", s.FormatExpressionOr, s.FormatExpressionAnd) s.FormatValue.set(s.FormatToken, s.FormatGroup, s.FormatReference) s.FormatExpression.set(s.FormatValue, s.FormatExpressionContinuation.zeroOrMore()) # Statements g.rule("FormatPattern", s.EOL.optional(), s.FORMAT_PAT, s.Tree) g.rule("FormatDefinition", s.FORMAT_NAME, s.FORMAT_VARIANT.optional(), s.FORMAT_DEF, s.FormatExpression, s.FormatPattern.optional(), s.FORMAT_END) g.rule("FormatComment", s.FORMAT_COMMENT, s.EOL) # Formats g.group( "FormatStatement", s.FormatComment, s.FormatDefinition, s.EMPTY_LINE, ) g.rule("Formats", s.FormatStatement.zeroOrMore()) g.axiom = s.Formats g.skip = s.WS if not GRAMMAR: GRAMMAR = g g.setVerbose(isVerbose) return g
def grammar(g: Optional[Grammar] = None, isVerbose=False) -> Grammar: """Defines the grammar that parses grammar rule definitions.""" global GRAMMAR if not g: if GRAMMAR: return GRAMMAR else: g = Grammar("tree", isVerbose=isVerbose) s = symbols(g) # Node attributes g.group("NodeAttributeString", s.STRING_DQ) g.group("NodeAttributeNumber", s.NUMBER) g.group("NodeAttributeSymbol", s.NODE_ATTRIBUTE_VALUE) g.group("NodeAttributeValue", s.NodeAttributeString, s.NodeAttributeNumber, s.NodeAttributeSymbol) g.rule("NodeAttribute", s.LP, s.NODE_NAME._as("key"), s.WS, s.NodeAttributeValue._as("value"), s.RP) g.rule("NodeAttributes", s.LP, s.AT, s.WS, s.NodeAttribute.oneOrMore()._as("attributes"), s.RP) g.group("NodeChild") g.rule("NodeString", s.STRING_DQ) g.rule("NodeNumber", s.NUMBER) g.rule("NodeSymbol", s.NODE_SYMBOL_Q) g.rule("NodeTemplate", s.NODE_TEMPLATE) g.group("NodeComment", s.NODE_COMMENT) g.rule("Leaf", s.NODE_NAME._as("name")) g.rule("Node", s.LP, s.NODE_NAME._as("name"), s.NodeAttributes.optional()._as("attributes"), s.NodeChild.zeroOrMore()._as("children"), s.RP) # NOTE: Changing the order might speed up things s.NodeChild.set( s.NodeSymbol, s.NodeNumber, s.NodeString, s.NodeTemplate, s.Leaf, s.Node, ) g.rule("Tree", s.NodeComment.zeroOrMore(), s.NodeChild._as("node"), s.NodeComment.zeroOrMore()) g.rule("Forest", s.Tree.oneOrMore()) g.axiom = s.Forest g.skip = s.WS if not GRAMMAR: GRAMMAR = g g.setVerbose(isVerbose) return g
def symbols(g: Grammar) -> Symbols: """Registers tokens and words that are shared by all the grammars defined in this moddule.""" s = g.symbols tokens = { "WS": "[\s\n]+", "NUMBER": "[0-9]+(\.[0-9]+)?", "STRING_DQ": "\"[^\"]*\"", "EMPTY_LINE": "s*\n", "NODE_NAME": "[a-z][\-a-z0-9]*", "NODE_COMMENT": ";;[^\n]*[\n]?", "NODE_SYMBOL_Q": "'([a-zA-Z][\-_A-Za-z0-9]*)", "NODE_ATTRIBUTE_VALUE": "[^\s\)\(\[\]\{\}]+", "NODE_TEMPLATE": "(…|\.\.\.)?([A-Z][_A-Z]*)", } words = { "LP": "(", "RP": ")", "AT": "@", } groups = () for k, v in tokens.items(): if not hasattr(s, k): g.token(k, v) for k, v in words.items(): if not hasattr(s, k): g.word(k, v) for k in groups: if not hasattr(s, k): g.group(k) return g.symbols
def grammar(g: Optional[Grammar] = None, isVerbose=True) -> Grammar: """Defines the grammar that parses grammar rule definitions.""" global GRAMMAR if not g: if GRAMMAR: return GRAMMAR else: g = Grammar("expr-indent", isVerbose=isVerbose) s = symbols(g) g.procedure("ExprIndent", ParserUtils.Indent) g.procedure("ExprDedent", ParserUtils.Dedent) g.group("ExprIndentedValue") g.rule("ExprIndent", s.INDENT._as("indent"), g.acondition(ParserUtils.CheckIndent)) g.rule("ExprLine", s.ExprIndent._as("indent"), s.ExprIndentedValue.oneOrMore()._as("value")) g.rule("ExprBlock") g.rule("ExprBlockChild", s.ExprIndent, s.ExprBlock._as("child"), s.ExprDedent) s.ExprBlock.set(s.ExprLine.oneOrMore(), s.ExprBlockChild.zeroOrMore()) # # FIXME: Seting this line causes a lot of problems # s.ExprJoin.set(s.WS, s.ExprValuePrefix._as("arg")) g.rule("ExprIndentedList", s.LP, s.ExprIndentedValue._as("arg"), s.RP) g.rule("ExprIndentedTemplate", s.EXPR_TEMPLATE, s.ExprIndentedValue._as("value"), s.RB) g.rule("ExprIndentedJoin", s.SPACES, s.ExprValuePrefix._as("arg")) # FIXME: This does not work better either # s.ExprValuePrefix.replace(0,s.ExprIndentedList) # s.ExprValuePrefix.replace(2,s.ExprIndentedTemplate) g.group("ExprIndentedValueSuffix").set( s.ExprPipe, s.ExprRest, s.ExprIndentedJoin, s.ExprComment, ) s.ExprIndentedValue.set( s.ExprValuePrefix._as("prefix"), s.ExprIndentedValueSuffix.zeroOrMore()._as("suffixes")) g.axiom = s.ExprBlock g.skip = s.SPACES if not GRAMMAR: GRAMMAR = g g.setVerbose(isVerbose) return g
from tlang.query.parser import grammar, QueryProcessor from tlang.utils import ParserUtils from libparsing import Grammar Processor = QueryProcessor G = grammar(Grammar("tlang"), suffixed=False) G.axiom = G.symbols.ExprValue P = Processor(G) def parseString(text: str, isVerbose=False, process=True): return ParserUtils.ParseString( grammar, text, isVerbose, processor=QueryProcessor.Get() if process else None) def parseFile(path: str, isVerbose=False, process=True): return ParserUtils.ParseFile( grammar, path, isVerbose, processor=QueryProcessor.Get() if process else None) if __name__ == "__main__": import os, sys path = sys.argv[1] text = open(path).read() if os.path.exists(path) else path print(parseString(text))
def grammar(g: Optional[Grammar] = None, isVerbose=False, suffixed=False) -> Grammar: """Defines the grammar that parses grammar rule definitions. When `suffixed` is `True`, Query matches will need to have at least one suffix, which is necessary embedded in the expression language.""" global GRAMMAR if not g: if GRAMMAR: return GRAMMAR else: g = Grammar("query", isVerbose=isVerbose) g = expr_grammar(g) s = symbols(g) # TODO: Template g.group( "QueryAxis", s.QUERY_AXIS_DESCENDANTS, s.QUERY_AXIS_ANCESTORS, s.QUERY_AXIS_BEFORE, s.QUERY_AXIS_AFTER, s.QUERY_AXIS_SELF, ) g.group("Query") g.rule("QueryNode", s.QUERY_NODE._as("name")) g.rule("QueryVariable", s.QUERY_VARIABLE) g.rule("QueryAttribute", s.QUERY_ATTRIBUTE) g.rule("QueryCurrentNode", s.QUERY_CURRENT_NODE) g.rule("QuerySubset", s.QUERY_SUBSET) g.group( "QuerySelectorValue", s.QuerySubset, s.QueryCurrentNode, s.QueryVariable, s.QueryNode, s.QueryAttribute, ) # TODO: Support optional name g.rule("QuerySelectorBinding", s.LB, g.arule(s.QUERY_VARIABLE, s.COLON).optional()._as("name"), s.Query._as("value"), s.RB) g.group( "QuerySelector", s.QuerySelectorBinding, s.QuerySelectorValue, ) g.rule( "QueryPredicate", s.LS, s.ExprValue._as("expr"), s.RS, ) g.rule( "QueryPrefix", s.QueryAxis.optional()._as("axis"), s.QuerySelector._as("selector"), s.QueryPredicate.optional()._as("predicate"), ) g.rule( "QuerySuffix", s.QueryAxis._as("axis"), s.QuerySelector._as("selector"), s.QueryPredicate.optional()._as("predicate"), ) # FIXME: So that whole "suffixed" thing is to avoid grammar conflicts # with the expr parser. But it doesn't work that well in the current # state, so we should remove it and rework it. #g.rule("QuerySuffixed", s.QueryPrefix._as("prefix"), s.QuerySuffix.oneOrMore()._as("suffixes")) g.rule("QuerySuffixedOptional", s.QueryPrefix._as("prefix"), s.QuerySuffix.zeroOrMore()._as("suffixes")) g.rule("QueryAttributePrefix", s.QueryAttribute._as("prefix"), s.QuerySuffix.zeroOrMore()._as("suffixes")) # if suffixed: # s.Query.set(s.QuerySuffixed, s.QueryAttributePrefix) # else: s.Query.set(s.QuerySuffixedOptional, s.QueryAttributePrefix) # We insert the QuerySuffixed just before the EXPR_VARIABLE, as the query # also has a variable. We only want ExprValuePrefix to be queries with # a suffix, not just a query with a prefix as things like `(fail!) would not # parse as `fail` is a query prefix, and the `!` would then become # unparseable. s.ExprValuePrefix.insert(10, s.Query) g.axiom = s.Query g.skip = s.WS if not GRAMMAR: GRAMMAR = g g.setVerbose(isVerbose) return g
def grammar(isVerbose=False): """Defines a grammar for simple artihmetic expressions calculation.""" g = Grammar(isVerbose=isVerbose) s = g.symbols g.token("WS", "\s+") g.token("NUMBER", "\d+(\.\d+)?") g.token("VARIABLE", "\w+") g.token("OPERATOR", "[\+\-\*/]") g.group("Value", s.NUMBER, s.VARIABLE) g.rule("Suffix", s.OPERATOR._as("operator"), s.Value._as("value")) g.rule("Expression", s.Value, s.Suffix.zeroOrMore()) g.axiom = s.Expression g.skip = s.WS return g