Ejemplo n.º 1
0
    def test_term_matching(self):
        gram = self.grammar

        # Simple term matching
        text = "grumble"
        node_tree = gram['term'].parse(text)
        self.assertEqual(node_tree,
                         RegexNode('term', text, 0, len(text)),
                         node_tree)
        self.assertEqual(
            gram['term'].parse(text).match.group(),
            text)

        # Quoted single term matching, should respond the same way as
        #   the simple term matching.
        text = "'grumble'"
        match_text = text[1:len(text)-1]
        node_tree = gram['quoted_term'].parse(text)
        self.assertEqual(node_tree,
                         Node('quoted_term', text, 0, len(text), children=[
                             Node('quote', text, 0, 1, children=[Node('', text, 0, 1)]),
                             # Grouping '()' node.
                             Node('', text, 1, 8, children=[
                                 # ZeroOrMore '*' node.
                                 Node('', text, 1, 8, children=[
                                     RegexNode('term', text, 1, 8),
                                     ]),
                                 ]),
                             Node('quote', text, 8, 9, children=[Node('', text, 8, 9)])
                             ]),
                         node_tree)
        self.assertEqual(node_tree.children[1].text,
            match_text)


        # Two quoted term matching, should respond as one term value.
        text = "'grumble wildly'"
        match_text = text[1:len(text)-1]
        node_tree = gram['quoted_term'].parse(text)
        self.assertEqual(node_tree,
                         Node('quoted_term', text, 0, len(text), children=[
                             Node('quote', text, 0, 1, children=[Node('', text, 0, 1)]),
                             # Grouping '()' node.
                             Node('', text, 1, 15, children=[
                                 # ZeroOrMore '*' nodes.
                                 Node('', text, 1, 8, children=[
                                     RegexNode('term', text, 1, 8),
                                     ]),
                                 Node('', text, 8, 9, children=[
                                     RegexNode('space', text, 8, 9),
                                     ]),
                                 Node('', text, 9, 15, children=[
                                     RegexNode('term', text, 9, 15),
                                     ]),
                                 ]),
                             Node('quote', text, 15, 16, children=[Node('', text, 15, 16)]),
                             ]),
                         node_tree)
        self.assertEqual(node_tree.children[1].text,
                         match_text)
Ejemplo n.º 2
0
 def _uncached_match(self, text, pos, cache):
     """Return length of match, ``None`` if no match."""
     m = self.re.match(text, pos)
     if m is not None:
         span = m.span()
         node = RegexNode(self.name, text, pos, pos + span[1] - span[0])
         node.match = m  # TODO: A terrible idea for cache size?
         return node
Ejemplo n.º 3
0
 def _uncached_match(self, text, pos, cache, error):
     """Return length of match, ``None`` if no match."""
     m = self.re.match(text, pos)
     if m is not None:
         span = m.span()
         node = RegexNode(self, text, pos, pos + span[1] - span[0])
         node.match = m  # TODO: A terrible idea for cache size?
         return node
Ejemplo n.º 4
0
    def test_punctuations(self):
        gram = self.grammar

        text = '"hello, name!"'
        node_tree = gram['quoted_term'].parse(text)

        expected_tree = Node(
            'quoted_term',
            text,
            0,
            len(text),
            children=[
                # "
                Node('quote', text, 0, 1),
                Node(
                    '',
                    text,
                    1,
                    13,
                    children=[
                        # hello,
                        Node('',
                             text,
                             1,
                             7,
                             children=[
                                 RegexNode('term', text, 1, 7),
                             ]),
                        # (space)
                        Node('',
                             text,
                             7,
                             8,
                             children=[
                                 RegexNode('space', text, 7, 8),
                             ]),
                        # name!
                        Node('',
                             text,
                             8,
                             13,
                             children=[
                                 RegexNode('term', text, 8, 13),
                             ]),
                    ]),
                # "
                Node('quote', text, 13, 14),
            ])
        self.assertEqual(node_tree, expected_tree, node_tree)
Ejemplo n.º 5
0
    def test_utf8_term(self):
        gram = self.grammar

        text = u'你好'
        node_tree = gram['term'].parse(text)
        self.assertEqual(node_tree, RegexNode('term', text, 0, len(text)))
        self.assertEqual(node_tree.match.group(), text)
Ejemplo n.º 6
0
def test_use_regex_library():
    grammar = Grammar(r'''
    unicode_word = ~"[\p{L}]*"
    ''',
                      use_regex_library=True)
    text = 'Тест'
    expected = RegexNode(expr=Regex(pattern=r'[\p{L}]*',
                                    use_regex_library=True),
                         full_text=text,
                         start=0,
                         end=4)
    result = grammar.parse(text=text)
    eq_(result, expected)
Ejemplo n.º 7
0
    def test_field_matching(self):
        gram = self.grammar

        # Simple field matching
        field_name = 'toggle'
        value = 'knob'
        text = "{}:{}".format(field_name, value)
        node_tree = gram['field'].parse(text)
        self.assertEqual(node_tree,
                         Node('field', text, 0, 11, children=[
                             RegexNode('field_name', text, 0, 6),
                             Node('', text, 6, 7),  # The ':'.
                             Node('', text, 7, 11, children=[
                                 RegexNode('term', text, 7, 11),
                             ]),
                         ]),
                         node_tree)
        self.assertEqual(node_tree.children[0].text, field_name)
        self.assertEqual(node_tree.children[2].text, value)

        # Field with quoted terms matching
        value = 'air knob'
        text = "{}:'{}'".format(field_name, value)
        node_tree = gram['field'].parse(text)
        self.assertEqual(node_tree,
                         Node('field', text, 0, 17, children=[
                             RegexNode('field_name', text, 0, 6),
                             Node('', text, 6, 7),  # The ':'.
                             Node('', text, 7, 17, children=[
                                 Node('quoted_term', text, 7, 17, children=[
                                     Node('quote', text, 7, 8, children=[
                                         Node('', text, 7, 8)]),
                                     Node('', text, 8, 16, children=[
                                         Node('', text, 8, 11, children=[
                                             RegexNode('term', text, 8, 11)]),
                                         Node('', text, 11, 12, children=[
                                             RegexNode('space', text, 11, 12)]),
                                         Node('', text, 12, 16, children=[
                                             RegexNode('term', text, 12, 16)]),
                                         ]),
                                     Node('quote', text, 16, 17, children=[
                                         Node('', text, 16, 17)]),
                                     ]),
                                 ]),
                             ]),
                         node_tree)
        self.assertEqual(node_tree.children[2].children[0].children[1].text,
                         value)
Ejemplo n.º 8
0
    def test_query_matching(self):
        gram = self.grammar

        # Combined expressions matching
        field_value = 'book'
        text_values = ['organic', ' ', 'chemistry', ' ',
                       'type:{}'.format(field_value)]
        text = ''.join(text_values)
        node_tree = gram['query'].parse(text)
        expected_node_tree = \
            Node('query', text, 0, 27, children=[
                Node('', text, 0, 7, children=[
                    Node('expression', text, 0, 7, children=[
                        Node('', text, 0, 7, children=[
                            RegexNode('term', text, 0, 7),
                            ])
                        ]),
                    ]),
                Node('', text, 7, 8, children=[
                    RegexNode('space', text, 7, 8),
                    ]),
                Node('', text, 8, 17, children=[
                    Node('expression', text, 8, 17, children=[
                         Node('', text, 8, 17, children=[
                             RegexNode('term', text, 8, 17),
                             ])
                         ]),
                    ]),
                Node('', text, 17, 18, children=[
                    RegexNode('space', text, 17, 18),
                    ]),
                Node('', text, 18, 27, children=[
                    Node('expression', text, 18, 27, children=[
                        Node('field', text, 18, 27, children=[
                            RegexNode('field_name', text, 18, 22),
                            Node('', text, 22, 23),
                            Node('', text, 23, 27, children=[
                                RegexNode('term', text, 23, 27),
                                ]),
                            ]),
                        ]),
                    ]),
                ])
        self.assertEqual(node_tree,
                         expected_node_tree,
                         node_tree)
        self.assertEqual(node_tree.children[0].children[0].text,
                         text_values[0])  # 'organic'
        self.assertEqual(node_tree.children[2].children[0].text,
                         text_values[2])  # 'chemistry'
        self.assertEqual(
            node_tree.children[4].children[0].children[0].children[2].text,
                         field_value)

        # Combined expressions with quoted terms matching
        field_value = 'book'
        text_values = ['"organic chemistry"', ' ',
                       'type:{}'.format(field_value)]
        text = ''.join(text_values)
        node_tree = gram['query'].parse(text)
        expected_node_tree = \
            Node('query', text, 0, 29, children=[
                Node('', text, 0, 19, children=[
                    Node('expression', text, 0, 19, children=[
                        Node('', text, 0, 19, children=[
                            Node('quoted_term', text, 0, 19, children=[
                                Node('quote', text, 0, 1, children=[
                                    Node('', text, 0, 1),
                                    ]),
                                Node('', text, 1, 18, children=[
                                    Node('', text, 1, 8, children=[
                                        RegexNode('term', text, 1, 8),
                                        ]),
                                    Node('', text, 8, 9, children=[
                                        RegexNode('space', text, 8, 9),
                                        ]),
                                    Node('', text, 9, 18, children=[
                                        RegexNode('term', text, 9, 18),
                                        ]),
                                    ]),
                                Node('quote', text, 18, 19, children=[
                                    Node('', text, 18, 19),
                                    ]),
                                ]),
                            ]),
                        ]),
                    ]),
                Node('', text, 19, 20, children=[
                    RegexNode('space', text, 19, 20),
                    ]),
                Node('', text, 20, 29, children=[
                    Node('expression', text, 20, 29, children=[
                        Node('field', text, 20, 29, children=[
                            RegexNode('field_name', text, 20, 24),
                            Node('', text, 24, 25),
                            Node('', text, 25, 29, children=[
                                RegexNode('term', text, 25, 29),
                                ]),
                            ]),
                        ]),
                    ]),
                ])
        self.assertEqual(node_tree,
                         expected_node_tree,
                         node_tree)
        # Match 'organic chemistry'
        self.assertEqual(
            node_tree.children[0].children[0].children[0].children[0].children[1].text,
            text_values[0][1:len(text_values[0])-1])
        # Match 'book'
        self.assertEqual(
            node_tree.children[2].children[0].children[0].children[2].text,
            field_value)