class BoolArg: grammar = peg.optional(peg.attr("prefix", FALSE_PREFIXES)), peg.attr("name", peg.word), \ peg.optional("=", peg.attr("right_hand_val", TRUE_VALUES + FALSE_VALUES)) @property def value(self): if hasattr(self, "prefix") and self.prefix in FALSE_PREFIXES: return False elif hasattr(self, "right_hand_val") and self.right_hand_val in FALSE_VALUES: return False else: return True @value.setter def value(self, value): if value: # if true is desired we want no prefix if hasattr(self, "prefix"): delattr(self, "prefix") # also, we completely omit the right hand side and the equals sign if hasattr(self, "right_hand_val"): delattr(self, "right_hand_val") else: setattr(self, "prefix", "!") # also, we completely omit the right hand side, only use prefix for negation as above if hasattr(self, "right_hand_val"): delattr(self, "right_hand_val")
class Selection(List): grammar = pg.optional(Number), '{', pg.some(Expression), '}', pg.optional(Number) def as_tuple(self): children = tuple(map(to_tuple, self)) down, up, exprs = 0, None, () assert len(children) in {1, 2, 3} print('CHILDS:', children) if isinstance(children[0], int): down = children[0] if isinstance(children[-1], int): up = children[-1] if up and down: assert len(children) == 3 exprs = children[1] elif up: assert len(children) == 2 exprs = children[0] elif down: assert len(children) == 2 exprs = children[1] else: assert len(children) == 1 exprs = children[0] return 'selection', down, up, (exprs,)
class ArgumentsBlock(List): grammar = optional('(', Arguments, optional(','), ')') @property def arguments(self): if self[0] is None: return [] # No arguments return self[0]
class Value(UnaryRule): """Generic rule for all kinds of phrases recognized. Serves as an encapsulation of the listed rules. """ grammar = attr('op', [(optional(omit(Literal("="))), RangeOp), GreaterEqualOp, LessEqualOp, GreaterThanOp, LessThanOp, (optional(omit(Literal("="))), [ ComplexValue, ParenthesizedSimpleValues, SimpleValueBooleanQuery, SimpleValue ])])
class VariantFull(Scope, Node): grammar = ( 'variant', pypeg2.optional(Identifier), pypeg2.optional(VariantTag), '{', StructVariantEntries, '}' ) def __init__(self, args): self._name = None self._tag = None if type(args[0]) is Identifier: self._name = args[0] args.pop(0) if type(args[0]) is VariantTag: self._tag = args[0] args.pop(0) super().__init__(args[0]) @property def name(self): return self._name @property def tag(self): return self._tag @tag.setter def tag(self, tag): self._tag = tag def __str__(self): variant = '<variant-full>' if self._name is not None: variant += str(self._name) if self._tag is not None: variant += str(self._tag) for e in self.entries: variant += str(e) variant += '</variant-full>' return variant
class Arguments(List): grammar = optional( csl([ ArgumentWithoutQuotes, ArgumentWithSingleQuotes, ArgumentWithDoubleQuotes ], separator=','))
def runTest(self): x = pypeg2.some("thing") y = pypeg2.maybe_some("thing") z = pypeg2.optional("hello", "world") self.assertEqual(x, (-2, "thing")) self.assertEqual(y, (-1, "thing")) self.assertEqual(z, (0, ("hello", "world")))
def arg(name: str, variants: list = None): if variants is None: variants = [] if name not in variants: variants.append(name) return variants, peg.optional("=", peg.attr("value", re.compile(r"[^,)]+")))
class IncludedField(List): grammar = optional(Alias), name() @property def alias(self): if len(self) > 0: return self[0].name return None
class RangeExpr(List): grammar = RangePart, maybe_some(',', optional(Operator), RangePart) def _build(self, rr): # no data here, just iterate for o in self: o._build(rr) return
class ContentDispositionValue: """A complete Content-Disposition value (RFC 6266, Section 4.1).""" # Allows nonconformant final semicolon # I've seen it in the wild, and browsers accept it # http://greenbytes.de/tech/tc2231/#attwithasciifilenamenqs grammar = (peg.attr('dtype', DispositionType), peg.attr('params', DispositionParmList), peg.optional(';'))
class NamedTerm(List): grammar = Ident, pg.optional('(', MultArgs, ')') def as_tuple(self): children = tuple(self) assert len(children) in {1, 2} if len(children) == 1: return ('term', to_tuple(children[0]), ()) return ('term', *map(to_tuple, children))
class Arguments(List): grammar = optional(csl( [ ArgumentWithoutQuotes, ArgumentWithQuotes, ], separator=[',', ''] ))
class Tag(Nullary): grammar = (attr('tag', re.compile(r'\w+', re.M)), optional([('"', attr('value', re.compile(r'[^"]*')), '"'), ( "'", attr('value', re.compile(r"[^']*")), "'", )]))
class EmptyQuery(LeafRule): grammar = omit(optional(whitespace)) def __init__(self): self.value = None def __repr__(self): return '%s()' % self.__class__.__name__
class SimpleValue(LeafRule): """Represents terminals as plaintext. E.g. title top cross section, or title Si-28(p(pol.), n(pol.)). """ class Whitespace(LeafRule): grammar = attr('value', whitespace) grammar = contiguous( SimpleValueUnit, maybe_some((optional(Whitespace), some(SimpleValueUnit)))) def __init__(self, values): super(SimpleValue, self).__init__() self.value = unicode.strip(''.join([v.value for v in values])) @classmethod def parse(cls, parser, text, pos): def unconsume_and_reconstruct_input(): """Reconstruct input in case of consuming a keyword query with ComplexValue as SimpleValue. Un-consuming 3 elements and specifically a Keyword, Whitespace and ComplexValue and then reconstructing parser's input text. Example: Given this query "author foo t 'bar'", r would be: r = [SimpleValueUnit("foo"), Whitespace(" "), SimpleValueUnit("t"), Whitespace(" "), SimpleValueUnit("'bar'")] thus after this method, r would be [SimpleValueUnit("foo"), Whitespace(" ")], while initial text will have been reconstructed as "t 'bar' rest_of_the_text". """ reconstructed_terminals = r[:idx - 2] remaining_text = ''.join([v.value for v in r[idx - 2:]]) + " " + t return remaining_text, reconstructed_terminals try: t, r = parser.parse(text, cls.grammar) # Covering a case of implicit-and when one of the SimpleValue tokens is a ComplexValue. # E.g. with the query "author foo t 'bar'", since 'bar' is a ComplexValue, then the previous token is a # keyword. This means we have consumed a KeywordQuery (due to 'and' missing). found_complex_value = False for idx, v in enumerate(r): if ComplexValue.regex.match(v.value): remaining_text, reconstructed_terminals = unconsume_and_reconstruct_input( r) found_complex_value = True break if found_complex_value: result = remaining_text, SimpleValue(reconstructed_terminals) else: result = t, SimpleValue(r) except SyntaxError as e: return text, e return result
class StructFull(Scope): grammar = ( 'struct', pypeg2.optional(Identifier), '{', StructVariantEntries, '}', pypeg2.optional(StructAlign) ) def __init__(self, args): self._name = None self._align = None if type(args[0]) is Identifier: self._name = args[0] args.pop(0) super().__init__(args[0].elements) args.pop(0) if args: self._align = args[0] @property def name(self): return self._name @property def align(self): return self._align def __str__(self): struct = '<struct-full>' if self._name is not None: struct += str(self._name) for e in self.entries: struct += str(e) if self._align is not None: struct += str(self._align) struct += '</struct-full>' return struct
def parse(cls, parser, text, pos): # Used to check whether we parsed successfully up to left_operand, operator = None, None try: # Parse left operand text_after_left_op, left_operand = parser.parse(text, cls.grammar[0]) # Parse boolean operators text_after_bool_op, operator = parser.parse(text_after_left_op, cls.grammar[1]) if not operator: # Implicit AND at terminals level operator = And(BooleanOperator.AND) # Parse right operand. # We don't want to eagerly recognize anything else other than a SimpleValue. # So we attempt to recognize the more specific rules, and if we do, then we need to stop identifying this # rule. parser.parse( text_after_bool_op, [ ( omit(optional(Not)), [ InvenioKeywordQuery, SpiresKeywordQuery, ] ), [ RangeOp, GreaterEqualOp, LessEqualOp, GreaterThanOp, LessThanOp, ComplexValue ] ] ) # Identified something other than a SimpleValue, stop parsing this rule. result = text, SyntaxError("expected simple value related rule as right operand of a " + cls.__name__) except SyntaxError as e: result = text, e if left_operand and operator: # Attempt to parse a right operand try: remaining_text, right_operand = parser.parse(text_after_bool_op, cls.grammar[2]) result = remaining_text, SimpleValueBooleanQuery( left_operand, bool_op=operator, right=right_operand ) except SyntaxError as e: # Actual failure of parsing boolean query at terminals level return text, e return result
class Block(List): grammar = ArgumentsBlock, '{', BlockBody, optional(','), '}' @property def arguments(self): return self[0].arguments @property def body(self): return self[1]
class Text(object): grammar = attr('whitespace', optional(whitespace)), attr('value', re.compile(r'[^<{]+')) def compose(self, parser, indent=0): indent_str = indent * " " return "{indent}'{whitespace}{value}'".format( indent=indent_str, whitespace=self.whitespace or '', value=self.value)
class Enum(Node): grammar = ( 'enum', pypeg2.optional(EnumName), ':', [ pypeg2.some(Identifier), Integer, ], '{', Enumerators, '}' ) def __init__(self, args): self._name = None if type(args[0]) is EnumName: self._name = args[0].value args.pop(0) if type(args[0]) is Integer: self._int_type = args[0] else: self._int_type = Identifier(' '.join([i.value for i in args[0:-1]])) self._enumerators = args[-1] @property def name(self): return self._name @property def int_type(self): return self._int_type @int_type.setter def int_type(self, int_type): self._int_type = int_type @property def enumerators(self): return self._enumerators def __str__(self): enum = '<enum>' if self._name is not None: enum += str(self._name) enum += str(self._int_type) enum += str(self._enumerators) enum += '</enum>' return enum
class Text(object): """Matches text between tags and/or inline code sections.""" grammar = attr('whitespace', optional(whitespace)), attr('value', re.compile(r'[^<{]+')) def compose(self, parser, indent=0): indent_str = int(indent) * " " return "{indent}'{whitespace}{value}'".format( indent=indent_str, whitespace=self.whitespace or '', value=self.value )
class ComponentName(object): """A standard name or symbol beginning with an uppercase letter. There are language implications of relying on an upper case letter. It seems reasonable to support another syntax for indicating a component at some point. Perhaps an '!' mark at the start of the name or something similar. """ grammar = attr('first_letter', re.compile(r'[A-Z]')), attr('rest', optional(Symbol)) def compose(self): return self.first_letter + (self.rest if self.rest else '')
class Query(List, Thing): grammar = expressions, maybe_some(optional(BooleanOperator), expressions) def build(self, builder): previous = None for thing in self: if previous and \ not isinstance(previous, BooleanOperator) and \ not isinstance(thing, BooleanOperator): builder.add('AND') thing.build(builder) previous = thing
class KeySymList(peg.List): """ A list of key symbols in brackets """ grammar = "[", peg.optional(peg.csl(KeySym)), "]" @staticmethod def from_strings(strings: list): levels = KeySymList() for s in strings: k = KeySym() k.name = s levels.append(k) return levels
class Query(ListRule): """The entry-point for the grammar. Find keyword is ignored as the current grammar is an augmentation of SPIRES and Invenio style syntaxes. It only serves for backward compatibility with SPIRES syntax. """ grammar = [ (omit(optional(re.compile(r"(find|fin|fi|f)\s", re.IGNORECASE))), (Statement, maybe_some(MalformedQueryWords))), MalformedQueryWords, EmptyQuery, ]
class String(_SingleValue, Node): grammar = ('string', pypeg2.optional(('{', ValueAssignment, ';', '}'))) def __init__(self, encoding=None): super().__init__(encoding) def __str__(self): string = '<string>' if self.value is not None: string += str(self.value) string += '</string>' return string
class Enumerators(_List, Node): grammar = pypeg2.csl(Enumerator), pypeg2.optional(',') def __init__(self, items): super().__init__([i.value for i in items]) def __str__(self): s = '<enumerators>' for e in self: s += str(e) s += '</enumerators>' return s
class ConstNumber(_SingleValue, Node): grammar = pypeg2.optional(re.compile(r'[+-]')), ConstInteger def __init__(self, args): mul = 1 if len(args) == 2: if args[0] == '-': mul = -1 args.pop(0) super().__init__(args[0].value * mul) def __str__(self): return '<const-number>{}</const-number>'.format(self.value)
class AmountExpression(Expression): grammar = \ attr('cmp', [symbols['more'], symbols['less']]), \ optional('than'), \ attr('sign', re.compile(r'[+-]?')), \ attr('value', number) def build(self, builder): operator = '>=' if self.cmp == 'more' else '<=' amount = 'amount' if self.sign else 'ABS(amount)' value = float(self.value) if self.sign == '-': value *= -1 builder.add('{} {} ?'.format(amount, operator), value)
def parse(parser, text, pos): result = PairedTag() try: text, _ = parser.parse(text, '<') text, tag = parser.parse(text, Symbol) result.name = tag text, attributes = parser.parse(text, Attributes) result.attributes = attributes text, _ = parser.parse(text, '>') text, children = parser.parse(text, TagChildren) result.children = children text, _ = parser.parse(text, optional(whitespace)) text, _ = parser.parse(text, '</') text, _ = parser.parse(text, result.name) text, _ = parser.parse(text, '>') except SyntaxError, e: return text, e
body = None class List(MExpression): head = MSymbol("List") grammar = ( Literal("{"), optional(attr("body", csl(MExpression))), Literal("}") ) # Since MExpression is recursive, we need to define the class, # then the grammar. Moreover, since it depends on List and other # such things, we need to put it last. MExpression.grammar = [ ( attr("head", MSymbol), Literal("["), optional(attr("body", csl(MExpression))), Literal("]") ), attr("head", MSymbol), List, atom ] ## TESTING ########################################################### if __name__ == "__main__": print parse("ab`c", MExpression) print parse('12', MExpression) print parse('"a"', MExpression) print parse("List", MExpression) print parse("List[]", MExpression)
def __init__(self): pass class ListRule(ast.ListOp): def __init__(self): pass class Whitespace(LeafRule): grammar = attr('value', re.compile(r"\s+")) _ = optional(Whitespace) class Not(object): grammar = omit([ omit(re.compile(r"and\s+not", re.I)), re.compile(r"not", re.I), Literal('-'), ]) class And(object): grammar = omit([ re.compile(r"and", re.I), Literal('+'), ])