class Block(Nary): grammar = ( [ ignore(re.compile('^````$', re.M)), ( ignore(re.compile('^````[ \t]*!:', re.M)), attr('tags', some([' ', Tag])), ) ], attr('content', some(['\n', BlockLine])), # NOTE: it's ok for a block to eat newlines ignore(re.compile('^````$', re.M)), )
class Selection(List): grammar = pg.optional(Number), '{', pg.some(Expression), '}', pg.optional(Number) def as_tuple(self): children = tuple(map(to_tuple, self)) down, up, exprs = 0, None, () assert len(children) in {1, 2, 3} print('CHILDS:', children) if isinstance(children[0], int): down = children[0] if isinstance(children[-1], int): up = children[-1] if up and down: assert len(children) == 3 exprs = children[1] elif up: assert len(children) == 2 exprs = children[0] elif down: assert len(children) == 2 exprs = children[1] else: assert len(children) == 1 exprs = children[0] return 'selection', down, up, (exprs,)
def test_multiline_comment(self): p = parse(self.multiline, some(Comment)) expected = map(str, self.multiline.split("\n")) hashes = len(expected) * ["#"] blanks = len(expected) * [""] expected = map(str.replace, expected, hashes, blanks) self.assertEqual(p, expected)
class SmartyLanguageMain(Rule): grammar = some([ LiteralStatement, TranslationStatement, IfStatement, ForStatement, IncludeStatement, ExtendsStatement, BlockStatement, AssignStatement, FunctionStatement, CommentStatement, SimpleTag, PrintStatement, Content, LeftDelimTag, RightDelimTag, LeftDelim ])
class String(List): grammar = some(Part) def _build(self, rr): for o in self: o._build(rr) return
def runTest(self): x = pypeg2.some("thing") y = pypeg2.maybe_some("thing") z = pypeg2.optional("hello", "world") self.assertEqual(x, (-2, "thing")) self.assertEqual(y, (-1, "thing")) self.assertEqual(z, (0, ("hello", "world")))
class Head(List): grammar = [Selection, NamedTerm, (NamedTerm, pg.some(';', NamedTerm))] def as_tuple(self): assert len(self) == 1 body = self[0] if ';' in body: return 'disjunction', body.as_tuple() return body.as_tuple()
class SimpleValue(LeafRule): """Represents terminals as plaintext. E.g. title top cross section, or title Si-28(p(pol.), n(pol.)). """ class Whitespace(LeafRule): grammar = attr('value', whitespace) grammar = contiguous( SimpleValueUnit, maybe_some((optional(Whitespace), some(SimpleValueUnit)))) def __init__(self, values): super(SimpleValue, self).__init__() self.value = unicode.strip(''.join([v.value for v in values])) @classmethod def parse(cls, parser, text, pos): def unconsume_and_reconstruct_input(): """Reconstruct input in case of consuming a keyword query with ComplexValue as SimpleValue. Un-consuming 3 elements and specifically a Keyword, Whitespace and ComplexValue and then reconstructing parser's input text. Example: Given this query "author foo t 'bar'", r would be: r = [SimpleValueUnit("foo"), Whitespace(" "), SimpleValueUnit("t"), Whitespace(" "), SimpleValueUnit("'bar'")] thus after this method, r would be [SimpleValueUnit("foo"), Whitespace(" ")], while initial text will have been reconstructed as "t 'bar' rest_of_the_text". """ reconstructed_terminals = r[:idx - 2] remaining_text = ''.join([v.value for v in r[idx - 2:]]) + " " + t return remaining_text, reconstructed_terminals try: t, r = parser.parse(text, cls.grammar) # Covering a case of implicit-and when one of the SimpleValue tokens is a ComplexValue. # E.g. with the query "author foo t 'bar'", since 'bar' is a ComplexValue, then the previous token is a # keyword. This means we have consumed a KeywordQuery (due to 'and' missing). found_complex_value = False for idx, v in enumerate(r): if ComplexValue.regex.match(v.value): remaining_text, reconstructed_terminals = unconsume_and_reconstruct_input( r) found_complex_value = True break if found_complex_value: result = remaining_text, SimpleValue(reconstructed_terminals) else: result = t, SimpleValue(r) except SyntaxError as e: return text, e return result
class Inline(Nullary): grammar = ( '[', attr('tags', some([' ', Tag])), ']', '[', attr('content', re.compile(r_inline, re.M)), ']', )
class Enum(Node): grammar = ( 'enum', pypeg2.optional(EnumName), ':', [ pypeg2.some(Identifier), Integer, ], '{', Enumerators, '}' ) def __init__(self, args): self._name = None if type(args[0]) is EnumName: self._name = args[0].value args.pop(0) if type(args[0]) is Integer: self._int_type = args[0] else: self._int_type = Identifier(' '.join([i.value for i in args[0:-1]])) self._enumerators = args[-1] @property def name(self): return self._name @property def int_type(self): return self._int_type @int_type.setter def int_type(self, int_type): self._int_type = int_type @property def enumerators(self): return self._enumerators def __str__(self): enum = '<enum>' if self._name is not None: enum += str(self._name) enum += str(self._int_type) enum += str(self._enumerators) enum += '</enum>' return enum
class WordsExpression(Expression, List): grammar = separated(some(Word)) def build(self, builder): builder.add(''' hash IN ( SELECT docid FROM transaction_search WHERE description MATCH ? )''', ' '.join(self))
class RawBlock(Nary): grammar = ( [ ignore(re.compile('^```$', re.M)), ( ignore(re.compile('^```[ \t]*!:', re.M)), attr('tags', some([' ', Tag])), ) ], attr('content', maybe_some(RawBlockLine)), ignore(re.compile('^```$', re.M)), )
class Integer(_List, Node): grammar = 'integer', '{', pypeg2.some((ValueAssignment, ';')), '}' def __init__(self, assignments): super().__init__(assignments) def __str__(self): integer = '<integer>' for a in self: integer += str(a) integer += '</integer>' return integer
class FloatingPoint(_List, Node): grammar = 'floating_point', '{', pypeg2.some((ValueAssignment, ';')), '}' def __init__(self, assignments): super().__init__(assignments) def __str__(self): float = '<floating-point>' for a in self: float += str(a) float += '</floating-point>' return float
class IdentifierField(Node): # Here's the hackish way to parse fields like: # # int a # int a[23] # unsigned long b # unsigned long b[23] # # We scan for identifiers and assume the last one is the declarator # name, not part of the type alias. Then come subscripts. grammar = pypeg2.some(Identifier), pypeg2.maybe_some(UnaryExprSubscript) def __init__(self, args): self._type = [] subscripts = [] for a in args: if type(a) is Identifier: self._type.append(a.value) elif type(a) is UnaryExprSubscript: subscripts.append(a) decl_name = self._type.pop() self._decl = Declarator(Identifier(decl_name), subscripts) # may contain spaces -> not really an identifier; still simpler self._type = Identifier(' '.join(self._type)) @property def type(self): return self._type @type.setter def type(self, type): self._type = type @property def decl(self): return self._decl def __str__(self): return '<id-field>{}{}</id-field>'.format(str(self._type), str(self._decl))
class TypeAlias(Node): grammar = 'typealias', Type, ':=', pypeg2.some(Identifier) def __init__(self, args): self._type = args[0].value args.pop(0) # may contain spaces -> not really an identifier; still simpler self._name = Identifier(' '.join([id.value for id in args])) @property def type(self): return self._type @property def name(self): return self._name def __str__(self): return '<typealias>{}{}</typealias>'.format(str(self._type), str(self._name))
class Heading(Nary): grammar = (attr('heading', re.compile(r'#+', re.M)), ignore(re.compile(' *', re.M)), attr('content', some([Inline, HeadingText])))
class BlockLine(Nary): grammar = (ignore(re.compile(r_block_line_condition, re.M)), attr('content', some([Inline, BlockText])))
class Table(Nary): grammar = ( optional(ignore(re.compile(r'^\|[ \t]*!:', re.M)), attr('tags', some([' ', Tag])), '\n'), attr('content', (TableLine, maybe_some('\n', TableLine))), )
def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse("hello, world", pypeg2.some(re.compile(r"\d", re.U)))
class MalformedQueryWords(ListRule): """Represents queries that weren't recognized by the main parsing branch of Statements.""" grammar = some(re.compile(r"[^\s]+", re.UNICODE)) def __init__(self, children): self.children = children
class Program(List): grammar = pg.some([Constraint, Rule, Head], '.')
class SpiresSimpleValueUnit(LeafRule): grammar = [ re.compile(r"[^\s\)\(]+"), (re.compile(r'\('), SpiresSimpleValue, re.compile(r'\)')), ] def __init__(self, args): super(SpiresSimpleValueUnit, self).__init__() if isinstance(args, string_types): self.value = args else: self.value = args[0] + args[1].value + args[2] SpiresSimpleValue.grammar = some(SpiresSimpleValueUnit) class SpiresSmartValue(UnaryRule): @classmethod def parse(cls, parser, text, pos): # pylint: disable=W0613 """Match simple values excluding some Keywords like 'and' and 'or'""" if not text.strip(): return text, SyntaxError("Invalid value") class Rule(object): grammar = attr('value', SpiresSimpleValue), omit(re.compile(".*")) try: tree = pypeg2.parse(text, Rule, whitespace="") except SyntaxError:
class List(Nary): grammar = (ignore(re.compile(r_list_condition, re.M)), optional(ignore(re.compile(r'^(\t| {4})+\*[ \t]*!:', re.M)), attr('tags', some([' ', Tag])), '\n'), attr('content', (ListLine, maybe_some('\n', ListLine))))
class SimpleValueUnit(LeafRule): grammar = [ re.compile(r"[^\s\)\(:]+"), (re.compile(r'\('), SimpleValue, re.compile(r'\)')), ] def __init__(self, args): super(SimpleValueUnit, self).__init__() if isinstance(args, basestring): self.value = args else: self.value = args[0] + args[1].value + args[2] SimpleValue.grammar = some(SimpleValueUnit) class SpiresSimpleValue(LeafRule): def __init__(self, values): super(SpiresSimpleValue, self).__init__() self.value = "".join(v.value for v in values) class SpiresSimpleValueUnit(LeafRule): grammar = [ re.compile(r"[^\s\)\(]+"), (re.compile(r'\('), SpiresSimpleValue, re.compile(r'\)')), ]
class Paragraph(Nary): # Candidate: ^(?!(```|````|\t+\*|( {4})+\*|\|)) # This way, the regex only need to not match [, ] and consecutive \n grammar = (ignore(re.compile(r_paragraph_condition, re.M)), attr('content', some([Inline, ParagraphText])))
class ListLine(Nary): grammar = ( attr('indentation', re.compile(r'^(\t| {4})+', re.M)), ignore(re.compile(r'\* *', re.M)), attr('content', some([Inline, ListLineText])), )
def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", pypeg2.some(re.compile(r"\w", re.U))) self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o']))
class SimpleValueUnit(LeafRule): grammar = [ re.compile(r"[^\s\)\(:]+"), (re.compile(r'\('), SimpleValue, re.compile(r'\)')), ] def __init__(self, args): super(SimpleValueUnit, self).__init__() if isinstance(args, string_types): self.value = args else: self.value = args[0] + args[1].value + args[2] SimpleValue.grammar = some(SimpleValueUnit) class SimpleRangeValue(LeafRule): grammar = attr('value', re.compile(r"([^\s\)\(-]|-+[^\s\)\(>])+")) class RangeValue(UnaryRule): grammar = attr('op', [DoubleQuotedString, SimpleRangeValue]) class RangeOp(BinaryRule): grammar = ( attr('left', RangeValue), Literal('->'), attr('right', RangeValue)
additive_expression = [Add, Subtract, multiplicative_expression] Add.grammar = multiplicative_expression, "+", additive_expression Subtract.grammar = multiplicative_expression, "-", additive_expression conditional_expression = [GreaterThan, LessThan, additive_expression] GreaterThan.grammar = additive_expression, ">", conditional_expression LessThan.grammar = additive_expression, "<", conditional_expression logical_expression = [And, Or, conditional_expression] And.grammar = conditional_expression, "&&", logical_expression Or.grammar = conditional_expression, "||", logical_expression Expression.grammar = logical_expression Assign.grammar = Variable, "=", Expression, ";" statement = [Assign, If, While] If.grammar = K("if"), "(", logical_expression, ")", "{", Block, "}", \ K("else"), "{", Block, "}" While.grammar = K("while"), "(", logical_expression, ")", "{", Block, "}" Block.grammar = some(statement) Program.grammar = Block
""" return compose(self[0]) def to_simple(self): """Generate corresponding simple object that can be evaluated.""" return self[0].to_simple() Number.grammar = regex(r"(\+|\-)?[0-9]+(\.[0-9]+)?") Boolean.grammar = regex(r"(true|false)") Variable.grammar = Symbol term_expression = [Number, Boolean, Variable] multiplicative_expression = [Multiply, Divide, term_expression] Multiply.grammar = term_expression, "*", multiplicative_expression Divide.grammar = term_expression, "/", multiplicative_expression additive_expression = [Add, Subtract, multiplicative_expression] Add.grammar = multiplicative_expression, "+", additive_expression Subtract.grammar = multiplicative_expression, "-", additive_expression Expression.grammar = additive_expression Assign.grammar = Variable, "=", Expression statement = [Assign] Block.grammar = some(statement) Program.grammar = Block
# -*- coding: utf-8 -*- # May you recognize your weaknesses and share your strengths. # May you share freely, never taking more than you give. # May you find love and love everyone you find. import re import pypeg2 class Dialog(str): grammar = '"', pypeg2.maybe_some(pypeg2.word), '"', pypeg2.endl class Label(pypeg2.List): pass instruction = [Dialog, Label] # We have to delay this definition because it's circular. Label.grammar = 'label', pypeg2.name(), ':', pypeg2.endl, pypeg2.some(instruction) def parse(inFile): text = open(inFile).read() return pypeg2.parse(text, instruction)
class SimpleValueUnit(LeafRule): grammar = [ re.compile(r"[^\s\)\(:]+"), (re.compile(r'\('), SimpleValue, re.compile(r'\)')), ] def __init__(self, args): super(SimpleValueUnit, self).__init__() if isinstance(args, string_types): self.value = args else: self.value = args[0] + args[1].value + args[2] SimpleValue.grammar = some(SimpleValueUnit) class SimpleRangeValue(LeafRule): grammar = attr('value', re.compile(r"([^\s\)\(-]|-+[^\s\)\(>])+")) class RangeValue(UnaryRule): grammar = attr('op', [DoubleQuotedString, SimpleRangeValue]) class RangeOp(BinaryRule): grammar = (attr('left', RangeValue), Literal('->'), attr('right', RangeValue))
class TableCell(Nary): grammar = attr('content', some([Inline, TableCellText]))
class SimpleValue(LeafRule): """Represents terminals as plaintext. E.g. title top cross section, or title Si-28(p(pol.), n(pol.)). """ class Whitespace(LeafRule): grammar = attr('value', whitespace) grammar = contiguous([SimpleValueUnit, SimpleValueWithColonUnit], maybe_some( (optional(Whitespace), some(SimpleValueUnit)))) def __init__(self, values): super(SimpleValue, self).__init__() if isinstance(values, six.string_types): self.value = values else: self.value = six.text_type.strip(''.join([v.value for v in values])) @staticmethod def unconsume_and_reconstruct_input(remaining_text, recognized_tokens, complex_value_idx): """Reconstruct input in case of consuming a keyword query or a value query with ComplexValue as value. Un-consuming at most 3 elements and specifically (Keyword,) Whitespace and ComplexValue, while also reconstructing parser's input text. Example: Given this query "author foo t 'bar'", r would be: r = [SimpleValueUnit("foo"), Whitespace(" "), SimpleValueUnit("t"), Whitespace(" "), SimpleValueUnit("'bar'")] thus after this method, r would be [SimpleValueUnit("foo"), Whitespace(" ")], while initial text will have been reconstructed as "t 'bar' rest_of_the_text". """ # Default slicing index: i.e. at most 3 elements will be unconsumed, Keyword, Whitespace and ComplexValue. slicing_start_idx = 2 # Check whether the 3rd element from the end is an InspireKeyword. If not, a Value query with ComplexValue # was consumed. if not INSPIRE_PARSER_KEYWORDS.get( recognized_tokens[complex_value_idx - slicing_start_idx].value, None): slicing_start_idx = 1 reconstructed_terminals = recognized_tokens[:complex_value_idx - slicing_start_idx] reconstructed_text = '{} {}'.format( ''.join([ token.value for token in recognized_tokens[complex_value_idx - slicing_start_idx:] ]), remaining_text) return reconstructed_text, reconstructed_terminals @classmethod def parse(cls, parser, text, pos): try: remaining_text, recognized_tokens = parser.parse(text, cls.grammar) # Covering a case of implicit-and when one of the SimpleValue tokens is a ComplexValue. # This means we either have a KeywordQuery or a ValueQuery with a ComplexValue. # E.g. "author foo t 'bar'", since 'bar' is a ComplexValue, then the previous token is a keyword. # This means we have consumed a KeywordQuery (due to 'and' missing). # Same goes for "author foo 'bar'", but in this case we have a ValueQuery with a ComplexValue. found_complex_value = False for idx, token in enumerate(recognized_tokens): if ComplexValue.regex.match(token.value): reconstructed_text, reconstructed_terminals = cls.unconsume_and_reconstruct_input( remaining_text, recognized_tokens, idx) found_complex_value = True break if found_complex_value: result = reconstructed_text, SimpleValue( reconstructed_terminals) else: result = remaining_text, SimpleValue(recognized_tokens) except SyntaxError as e: return text, e return result
class TableLine(Nary): grammar = (ignore(re.compile(r_table_line_condition, re.M)), attr('content', some(['|', TableCell])))
blank, attr('operand3', ternary_expression_or_less)) BracketedExpression.grammar = '[', attr('content', ternary_expression_or_less), ']' ParensExpression.grammar = '(', attr('content', ternary_expression_or_less), ')' InvocationExpression.grammar = (attr('reference', token), '(', attr('arguments', pypeg2.csl(ternary_expression_or_less)), ')') AttributeExpression.grammar = (attr( 'reference', [InvocationExpression, token, ParensExpression ]), attr('attributes', pypeg2.some([BracketedExpression, ('.', token)]))) AssignmentExpression.grammar = ( attr('operand1', [AttributeExpression, token]), blank, attr('operator', re.compile('[*/+-]?=')), blank, attr('operand2', [AssignmentExpression, *ternary_expression_or_less])) VariableDeclaration.grammar = ( attr( 'qualifiers', maybe_some( re.compile('const|highp|mediump|lowp|attribute|uniform|varying'))), attr('type', [AttributeExpression, token]), blank, attr('content', pypeg2.csl([AssignmentExpression, token])), )
class IncludeStatement(UnaryRule): grammar = '{', _, Keyword('include'), _, Literal('file='), Expression, _, '}' class SimpleTag(LeafRule): grammar = '{', _, re.compile('|'.join(['init_time', 'process_time'])), _, '}' """ Finally, the actual language description. """ SmartyLanguage.grammar = some([LiteralStatement, TranslationStatement, IfStatement, ForStatement, IncludeStatement, AssignStatement, FunctionStatement, CommentStatement, SimpleTag, PrintStatement, Content, LeftDelimTag, RightDelimTag]) class SmartyLanguageMain(Rule): grammar = some([LiteralStatement, TranslationStatement, IfStatement, ForStatement, IncludeStatement, AssignStatement, FunctionStatement, CommentStatement, SimpleTag, PrintStatement, Content, LeftDelimTag, RightDelimTag, LeftDelim]) class SmartyLanguageMainOrEmpty(UnaryRule): grammar = [SmartyLanguageMain, EmptyOperator]