class ForAll(List): grammar = NamedTerm, ':', Term, pg.maybe_some(',', Term) def as_tuple(self): head, *conditions = self head = head.as_tuple()[1:] # only the predicate and args conditions = tuple(sub.as_tuple() for sub in conditions) return ('forall', *head, conditions)
class ExpressionList(List): grammar = maybe_some([ EqualsExpression, BetweenExpression, OldBetweenExpression, InExpression ]) def __str__(self): return ' and '.join(map(str, self))
class TypeField(Node): grammar = Type, Identifier, pypeg2.maybe_some(UnaryExprSubscript) def __init__(self, args): self._type = args[0].value args.pop(0) decl_name = args[0] args.pop(0) self._decl = Declarator(decl_name, args) @property def type(self): return self._type @type.setter def type(self, type): self._type = type @property def decl(self): return self._decl def __str__(self): return '<type-field>{}{}</type-field>'.format(str(self._type), str(self._decl))
def runTest(self): x = pypeg2.some("thing") y = pypeg2.maybe_some("thing") z = pypeg2.optional("hello", "world") self.assertEqual(x, (-2, "thing")) self.assertEqual(y, (-1, "thing")) self.assertEqual(z, (0, ("hello", "world")))
class LatchModsAction(BoolArgsListMixin): """ LatchMods() action as used in interpret { ... } or key description. Difference to SetMods unknown?? """ arguments = [peg.attr("modifiers", ModifiersArgument), BoolArg] grammar = "LatchMods", "(", arguments, peg.maybe_some(",", arguments), ")" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) setattr(self, "modifiers", ModifiersArgument())
class RangeExpr(List): grammar = RangePart, maybe_some(',', optional(Operator), RangePart) def _build(self, rr): # no data here, just iterate for o in self: o._build(rr) return
class SimpleValue(LeafRule): """Represents terminals as plaintext. E.g. title top cross section, or title Si-28(p(pol.), n(pol.)). """ class Whitespace(LeafRule): grammar = attr('value', whitespace) grammar = contiguous( SimpleValueUnit, maybe_some((optional(Whitespace), some(SimpleValueUnit)))) def __init__(self, values): super(SimpleValue, self).__init__() self.value = unicode.strip(''.join([v.value for v in values])) @classmethod def parse(cls, parser, text, pos): def unconsume_and_reconstruct_input(): """Reconstruct input in case of consuming a keyword query with ComplexValue as SimpleValue. Un-consuming 3 elements and specifically a Keyword, Whitespace and ComplexValue and then reconstructing parser's input text. Example: Given this query "author foo t 'bar'", r would be: r = [SimpleValueUnit("foo"), Whitespace(" "), SimpleValueUnit("t"), Whitespace(" "), SimpleValueUnit("'bar'")] thus after this method, r would be [SimpleValueUnit("foo"), Whitespace(" ")], while initial text will have been reconstructed as "t 'bar' rest_of_the_text". """ reconstructed_terminals = r[:idx - 2] remaining_text = ''.join([v.value for v in r[idx - 2:]]) + " " + t return remaining_text, reconstructed_terminals try: t, r = parser.parse(text, cls.grammar) # Covering a case of implicit-and when one of the SimpleValue tokens is a ComplexValue. # E.g. with the query "author foo t 'bar'", since 'bar' is a ComplexValue, then the previous token is a # keyword. This means we have consumed a KeywordQuery (due to 'and' missing). found_complex_value = False for idx, v in enumerate(r): if ComplexValue.regex.match(v.value): remaining_text, reconstructed_terminals = unconsume_and_reconstruct_input( r) found_complex_value = True break if found_complex_value: result = remaining_text, SimpleValue(reconstructed_terminals) else: result = t, SimpleValue(r) except SyntaxError as e: return text, e return result
class TagChildren(List): grammar = maybe_some(tags + [Text, InlineCode, Whitespace]) def compose(self, parser, indent=0): text = [] for entry in self: text.append(entry.compose(parser, indent=indent)) text.append(',\n') return ''.join(text)
class Query(List, Thing): grammar = expressions, maybe_some(optional(BooleanOperator), expressions) def build(self, builder): previous = None for thing in self: if previous and \ not isinstance(previous, BooleanOperator) and \ not isinstance(thing, BooleanOperator): builder.add('AND') thing.build(builder) previous = thing
class RawBlock(Nary): grammar = ( [ ignore(re.compile('^```$', re.M)), ( ignore(re.compile('^```[ \t]*!:', re.M)), attr('tags', some([' ', Tag])), ) ], attr('content', maybe_some(RawBlockLine)), ignore(re.compile('^```$', re.M)), )
class CodeBlock(List): grammar = maybe_some([PackedBlock, NonPackedLine, line_without_newline]) def compose(self, parser, attr_of=None): text = [] for entry in self: if isinstance(entry, str): text.append(entry) else: text.append(entry.compose(parser)) return ''.join(text)
class Query(ListRule): """The entry-point for the grammar. Find keyword is ignored as the current grammar is an augmentation of SPIRES and Invenio style syntaxes. It only serves for backward compatibility with SPIRES syntax. """ grammar = [ (omit(optional(re.compile(r"(find|fin|fi|f)\s", re.IGNORECASE))), (Statement, maybe_some(MalformedQueryWords))), MalformedQueryWords, EmptyQuery, ]
class TagChildren(List): """Matches valid tag children which can be other tags, plain text, {values} or a mix of all three.""" grammar = maybe_some(tags + [Text, InlineCode, Whitespace]) def compose(self, parser, indent=0): text = [] for entry in self: # Skip pure whitespace text.append(entry.compose(parser, indent=indent)) text.append(',\n') return ''.join(text)
class _AdditionExpression: grammar = _MultiplicationExpression, pypeg2.maybe_some( re.compile(r'\+|-'), _MultiplicationExpression, ) def __init__(self, operands): if len(operands) == 1: self.value = operands[0].value else: processed_operands = [] for i in range(1, len(operands), 2): if operands[i] == '+': processed_operands.append((True, operands[i + 1].value)) else: processed_operands.append((False, operands[i + 1].value)) self.value = _visitor.visit_addition(operands[0].value, processed_operands)
class IdentifierField(Node): # Here's the hackish way to parse fields like: # # int a # int a[23] # unsigned long b # unsigned long b[23] # # We scan for identifiers and assume the last one is the declarator # name, not part of the type alias. Then come subscripts. grammar = pypeg2.some(Identifier), pypeg2.maybe_some(UnaryExprSubscript) def __init__(self, args): self._type = [] subscripts = [] for a in args: if type(a) is Identifier: self._type.append(a.value) elif type(a) is UnaryExprSubscript: subscripts.append(a) decl_name = self._type.pop() self._decl = Declarator(Identifier(decl_name), subscripts) # may contain spaces -> not really an identifier; still simpler self._type = Identifier(' '.join(self._type)) @property def type(self): return self._type @type.setter def type(self, type): self._type = type @property def decl(self): return self._decl def __str__(self): return '<id-field>{}{}</id-field>'.format(str(self._type), str(self._decl))
class Attributes(List): grammar = optional(ignore(Whitespace), Attribute, maybe_some(ignore(Whitespace), Attribute)) def compose(self, parser, followed_by_children, indent): indent_str = indent * " " if not len(self): indented_paren = '{indent}{{}},\n'.format(indent=indent_str) return indented_paren if followed_by_children else '' text = [] text.append('{indent}{{\n'.format(indent=indent_str)) for entry in self: if not isinstance(entry, str): text.append(entry.compose(parser, indent=indent + 1)) text.append('\n') text.append('{indent}}},\n'.format(indent=indent_str)) return ''.join(text)
class CodeBlock(List): """Top level grammar representing a block of code, some of which will be Packed syntax and some won't. Ideally we would parse the entire Python file with an understanding of all the syntax and an understanding of where it is valid to have Packed syntax however for the moment we just parse is as a block of non-packed-syntax-lines and packed blocks. ie, individual lines with no packed syntax and multi-line blocks with have packed syntax. """ # line_without_newline accounts for the last line in the code sample which might have content # but no new line at the end grammar = maybe_some([PackedBlock, NonPackedLine, line_without_newline]) def compose(self, parser, attr_of=None): text = [] for entry in self: if isinstance(entry, string_types): text.append(entry) else: text.append(entry.compose(parser)) return ''.join(text)
class Structs(List): grammar = maybe_some(Struct)
class Args(List): grammar = Arg, pg.maybe_some(',', Arg)
class MultArgs(List): grammar = Args, pg.maybe_some(';', Args) def as_tuple(self): if len(self) > 1: return ('disjunction', *tuple(map(to_tuple, self))) return tuple(map(to_tuple, tuple(self)[0]))
class DispositionParmList(UniqueNamespace): """A list of disposition parameters (RFC6266, Section 4.1).""" grammar = peg.maybe_some(';', [ExtDispositionParm, DispositionParm])
class Body(List): grammar = Expression, pg.maybe_some(';', Expression) def as_tuple(self): return tuple(sub.as_tuple() for sub in self)
class Methods(Namespace): grammar = maybe_some(Method)
class Services(List): grammar = maybe_some(Service)
PrimaryExpr.grammar = [ Identifier, ConstNumber, LiteralString, ('(', UnaryExpr, ')'), ] PostfixExpr.grammar = ( Identifier, pypeg2.maybe_some( [ (Arrow, Identifier), (Dot, Identifier), UnaryExprSubscript ] ) ) UnaryExpr.grammar = [ PostfixExpr, PrimaryExpr, ] class Declarator(Node): def __init__(self, name, subscripts): self._name = name
class Exceptions(List): grammar = maybe_some(Exc)
class Annotations(Namespace): grammar = maybe_some(Annotation, endl)
class SimpleValue(LeafRule): """Represents terminals as plaintext. E.g. title top cross section, or title Si-28(p(pol.), n(pol.)). """ class Whitespace(LeafRule): grammar = attr('value', whitespace) grammar = contiguous([SimpleValueUnit, SimpleValueWithColonUnit], maybe_some( (optional(Whitespace), some(SimpleValueUnit)))) def __init__(self, values): super(SimpleValue, self).__init__() if isinstance(values, six.string_types): self.value = values else: self.value = six.text_type.strip(''.join([v.value for v in values])) @staticmethod def unconsume_and_reconstruct_input(remaining_text, recognized_tokens, complex_value_idx): """Reconstruct input in case of consuming a keyword query or a value query with ComplexValue as value. Un-consuming at most 3 elements and specifically (Keyword,) Whitespace and ComplexValue, while also reconstructing parser's input text. Example: Given this query "author foo t 'bar'", r would be: r = [SimpleValueUnit("foo"), Whitespace(" "), SimpleValueUnit("t"), Whitespace(" "), SimpleValueUnit("'bar'")] thus after this method, r would be [SimpleValueUnit("foo"), Whitespace(" ")], while initial text will have been reconstructed as "t 'bar' rest_of_the_text". """ # Default slicing index: i.e. at most 3 elements will be unconsumed, Keyword, Whitespace and ComplexValue. slicing_start_idx = 2 # Check whether the 3rd element from the end is an InspireKeyword. If not, a Value query with ComplexValue # was consumed. if not INSPIRE_PARSER_KEYWORDS.get( recognized_tokens[complex_value_idx - slicing_start_idx].value, None): slicing_start_idx = 1 reconstructed_terminals = recognized_tokens[:complex_value_idx - slicing_start_idx] reconstructed_text = '{} {}'.format( ''.join([ token.value for token in recognized_tokens[complex_value_idx - slicing_start_idx:] ]), remaining_text) return reconstructed_text, reconstructed_terminals @classmethod def parse(cls, parser, text, pos): try: remaining_text, recognized_tokens = parser.parse(text, cls.grammar) # Covering a case of implicit-and when one of the SimpleValue tokens is a ComplexValue. # This means we either have a KeywordQuery or a ValueQuery with a ComplexValue. # E.g. "author foo t 'bar'", since 'bar' is a ComplexValue, then the previous token is a keyword. # This means we have consumed a KeywordQuery (due to 'and' missing). # Same goes for "author foo 'bar'", but in this case we have a ValueQuery with a ComplexValue. found_complex_value = False for idx, token in enumerate(recognized_tokens): if ComplexValue.regex.match(token.value): reconstructed_text, reconstructed_terminals = cls.unconsume_and_reconstruct_input( remaining_text, recognized_tokens, idx) found_complex_value = True break if found_complex_value: result = reconstructed_text, SimpleValue( reconstructed_terminals) else: result = remaining_text, SimpleValue(recognized_tokens) except SyntaxError as e: return text, e return result
class Enums(List): grammar = maybe_some(Enum)
omit(Literal('|')), attr('op', SimpleQuery), ), ] Query.grammar = attr('children', ( [ NotQuery, ParenthesizedQuery, SimpleQuery, ], maybe_some(( omit(_), [ AndQuery, OrQuery, ImplicitAndQuery, ] )), )) class Main(UnaryRule): grammar = [ (omit(_), attr('op', Query), omit(_)), attr('op', EmptyQueryRule), ] # pylint: enable=C0321,R0903
(omit(Whitespace), attr('op', SpiresSimpleQuery)), (omit(_), attr('op', SpiresParenthesizedQuery)), (omit(Whitespace), attr('op', SpiresValueQuery)), ] ) SpiresQuery.grammar = attr('children', ( [ SpiresParenthesizedQuery, SpiresSimpleQuery, ], maybe_some(( omit(_), [ SpiresNotQuery, SpiresAndQuery, SpiresOrQuery, ] )), )) class NestableKeyword(LeafRule): grammar = attr('value', [ re.compile('refersto', re.I), re.compile('citedby', re.I), ]) class GreaterQuery(UnaryRule): grammar = (
(omit(Literal("+")), attr("op", SimpleQuery)), ] class ImplicitAndQuery(UnaryRule): grammar = [attr("op", ParenthesizedQuery), attr("op", SimpleQuery)] class OrQuery(UnaryRule): grammar = [ (omit(Or), [(omit(Whitespace), attr("op", SimpleQuery)), (omit(_), attr("op", ParenthesizedQuery))]), (omit(Literal("|")), attr("op", SimpleQuery)), ] Query.grammar = attr( "children", ([ParenthesizedQuery, SimpleQuery], maybe_some((omit(_), [NotQuery, AndQuery, OrQuery, ImplicitAndQuery]))), ) class EmptyQueryRule(LeafRule): grammar = attr("value", re.compile(r"\s*")) class Main(UnaryRule): grammar = [(omit(_), attr("op", Query), omit(_)), attr("op", EmptyQueryRule)] # pylint: enable=C0321,R0903
(omit(_), attr('op', SpiresParenthesizedQuery)), (omit(Whitespace), attr('op', SpiresValueQuery)), (omit(re.compile(r".*", re.I)), attr('op', EmptyQueryRule)), ] ) SpiresQuery.grammar = attr('children', ( [ SpiresParenthesizedQuery, SpiresSimpleQuery, ], maybe_some(( omit(_), [ SpiresNotQuery, SpiresAndQuery, SpiresOrQuery, ] )), )) SpiresKeywordQuery.grammar = [ ( attr('left', NestableKeyword), omit(_, Literal(':'), _), attr('right', [ SpiresParenthesizedQuery, SpiresSimpleQuery, ValueQuery ]),
class Unions(List): grammar = maybe_some(Struct)
def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", pypeg2.maybe_some(re.compile(r"\d", re.U))) self.assertEqual(r, ('hello, world', []))
(RelationalExpression, re.compile('[<>]=?')), (EqualityExpression, re.compile('==|\!=')), (BitwiseAndExpression, re.compile('&')), (BitwiseXorExpression, re.compile('^')), (BitwiseOrExpression, re.compile('\|')), (LogicalAndExpression, re.compile('&&')), (LogicalXorExpression, re.compile('^^')), (LogicalOrExpression, re.compile('\|\|'))] binary_expression_or_less = [*unary_expression_or_less] for BinaryExpressionTemp, binary_regex in order_of_operations: binary_expression_or_less = [ BinaryExpressionTemp, *binary_expression_or_less ] BinaryExpressionTemp.grammar = ( attr('comment1', maybe_some([inline_comment, endline_comment])), attr('operand1', binary_expression_or_less[1:]), attr('comment2', maybe_some([inline_comment, endline_comment])), blank, attr('operator', binary_regex), blank, attr('comment3', maybe_some([inline_comment, endline_comment])), attr('operand2', binary_expression_or_less), attr('comment4', maybe_some([inline_comment, endline_comment])), ) ternary_expression_or_less = [TernaryExpression, *binary_expression_or_less] TernaryExpression.grammar = (attr('operand1', binary_expression_or_less), '?', blank, attr('operand2', ternary_expression_or_less), blank, ':',
def compose(self, parser: Any, grammar: Any = None, attr_of: str = None) -> str: """ Return the Condition as string format :param parser: Parser instance :param grammar: Grammar :param attr_of: Attribute of... """ if type(self.left) is Condition: left = "({0})".format(parser.compose(self.left, grammar=grammar, attr_of=attr_of)) else: left = parser.compose(self.left, grammar=grammar, attr_of=attr_of) if getattr(self, 'op', None): if type(self.right) is Condition: right = "({0})".format(parser.compose(self.right, grammar=grammar, attr_of=attr_of)) else: right = parser.compose(self.right, grammar=grammar, attr_of=attr_of) op = parser.compose(self.op, grammar=grammar, attr_of=attr_of) result = "{0} {1} {2}".format(left, op, right) else: result = left return result Condition.grammar = contiguous(attr('left', [SIG, XHX, CSV, CLTV, ('(', Condition, ')')]), maybe_some(whitespace, attr('op', Operator), whitespace, attr('right', [SIG, XHX, CSV, CLTV, ('(', Condition, ')')])))