Example #1
0
 def test_delete_item(self):
     sxpr = '(root (A "0") (B "1") (C "2") (D "3"))'
     node = parse_sxpr(sxpr)
     try:
         _ = node.index('E')
         assert False, 'ValueError expected'
     except ValueError:
         pass
     assert node[-1].tag_name == "D"
     try:
         del node[4]
         assert False, 'IndexError expected'
     except IndexError:
         pass
     try:
         del node[-5]
         assert False, 'IndexError expected'
     except IndexError:
         pass
     del node[-2]
     assert 'C' not in node
     del node[0]
     assert 'A' not in node
     sxpr = '(root (A "0") (B "1") (C "2") (D "3"))'
     node = parse_sxpr(sxpr)
     del node[1:3]
     assert str(node) == "03"
Example #2
0
 def test_milestone_segment(self):
     tree = parse_sxpr(
         '(root (left (A "a") (B "b") (C "c")) (middle "-") (right (X "x") (Y "y") (Z "z")))'
     ).with_pos(0)
     left = tree.pick('left')
     right = tree.pick('right')
     middle = tree.pick('middle')
     B = tree.pick('B')
     Y = tree.pick('Y')
     segment = tree.milestone_segment(B, Y)
     assert segment.content == "bc-xy"
     assert left != segment.pick('left')
     assert right != segment.pick('right')
     assert B == segment.pick('B')
     assert Y == segment.pick('Y')
     assert middle == segment.pick('middle')
     A = tree.pick('A')
     Z = tree.pick('Z')
     segment = tree.milestone_segment(A, Z)
     assert segment == tree
     assert segment.content == "abc-xyz"
     segment = tree.milestone_segment(A, middle)
     assert segment.equals(
         parse_sxpr('(root (left (A "a") (B "b") (C "c")) (middle "-"))'))
     assert segment.content == "abc-"
     assert segment != tree
     assert A == segment.pick('A')
     assert middle == segment.pick('middle')
     root = tree.milestone_segment(tree, tree)
     assert root == tree
     assert tree.milestone_segment(B, B) == B
     C = tree.pick('C')
     segment = tree.milestone_segment(B, C)
     assert segment.equals(parse_sxpr('(left (B "b") (C "c"))'))
Example #3
0
    def test_insert_nodes(self):
        tree = parse_sxpr('(A (B 1) (B 2) (X 3))').with_pos(0)
        trans_table = {'A': insert(0, node_maker('c', '=>'))}
        traverse(tree, trans_table)
        result1 = tree.serialize()
        assert result1 == '(A (c "=>") (B "1") (B "2") (X "3"))', result1

        trans_table = {'A': insert(4, node_maker('d', '<='))}
        traverse(tree, trans_table)
        result2 = tree.serialize()
        assert result2 == '(A (c "=>") (B "1") (B "2") (X "3") (d "<="))', result2
        trans_table = {'A': insert(-2, node_maker('e', '|'))}
        traverse(tree, trans_table)
        result3 = tree.serialize()
        assert result3 == '(A (c "=>") (B "1") (B "2") (e "|") (X "3") (d "<="))', result3

        tree = parse_sxpr('(A "")').with_pos(0)
        trans_table = {'A': insert(0, node_maker('B', 'b'))}
        traverse(tree, trans_table)
        result4 = tree.serialize()
        assert result4 == '(A (B "b"))'

        tree = parse_sxpr('(A "")').with_pos(0)
        trans_table = {'A': insert(lambda ctx: None, node_maker('B', 'b'))}
        traverse(tree, trans_table)
        result5 = tree.serialize()
        assert result5 == '(A)'
Example #4
0
 def test_sexpr_attributes(self):
     tree = parse_sxpr('(A "B")')
     tree.attr['attr'] = "value"
     tree2 = parse_sxpr('(A `(attr "value") "B")')
     assert tree.as_sxpr() == tree2.as_sxpr()
     tree.attr['attr2'] = "value2"
     tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
     assert tree.as_sxpr() == tree3.as_sxpr()
Example #5
0
 def test_sexpr(self):
     tree = parse_sxpr('(A (B "C") (D "E"))')
     s = tree.as_sxpr(compact=False, flatten_threshold=0)
     assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
     tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
     s = tree.as_sxpr(compact=False, flatten_threshold=0)
     assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
         '\n  )\n  (G\n    "H"\n  )\n)'
     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
     s = tree.as_sxpr(compact=False, flatten_threshold=0)
     assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
         '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s
Example #6
0
 def test_parse_s_expression_w_attributes(self):
     s = '(A `(attr "1") (B "X"))'
     assert flatten_sxpr(
         parse_sxpr(s).as_sxpr()) == '(A `(attr "1") (B "X"))'
     s = """(BedeutungsPosition `(unterbedeutungstiefe "0")
              (Bedeutung
                (Beleg
                  (Quellenangabe (Quelle (Autor "LIUTPR.") (L " ") (Werk "leg.")) (L " ")
                    (BelegStelle (Stellenangabe (Stelle "21")) (L " ")
                      (BelegText (TEXT "...")))))))"""
     tree = parse_sxpr(s)
     assert str(tree) == "LIUTPR. leg. 21 ..."
     assert tree.attr['unterbedeutungstiefe'] == '0'
Example #7
0
 def test_position_assignment(self):
     tree = parse_sxpr('(A (B (C "D") (E "FF")) (G "HHH"))')
     # assignment of position values
     tree.with_pos(0)
     assert (tree.pos, tree['B'].pos, tree['B']['C'].pos,
             tree['B']['E'].pos, tree['G'].pos) == (0, 0, 0, 1, 3)
     # assignment of unassigned position values
     tree['G'].result = parse_sxpr('(_ (N "OOOO") (P "Q"))').children
     assert (tree['G']['N'].pos, tree['G']['P'].pos) == (3, 7)
     # no reassignment of position values
     # (because pos-values should always reflect source position)
     tree['G'].result = parse_sxpr('(_ (N "OOOO") (P "Q"))').with_pos(
         1).children
     assert (tree['G']['N'].pos, tree['G']['P'].pos) == (1, 5)
Example #8
0
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        tree.with_pos(0)
        tree_copy = copy.deepcopy(tree)

        assert tree.equals(tree_copy)
        assert tree.as_sxpr() == parse_sxpr(
            '(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr(
            '(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
        assert not tree_copy.errors
        assert tree.as_sxpr() != parse_sxpr(
            '(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr(
            '(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        #print(tree.as_sxpr())
        #print(tree.attr)
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()
Example #9
0
    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name

        matchf = lambda node: match_tag_name(node, "X")
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        matches = list(tree.select_if(matchf))
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
        assert list(tree.select_if(matchf2, include_root=True))
        assert not list(tree.select_if(matchf2, include_root=False))
Example #10
0
 def test_as_etree(self):
     import xml.etree.ElementTree as ET
     # import lxml.etree as ET
     sxpr = '(R (A "1") (S (B `(class "bold") "2")) (C "3"))'
     xml = '<R><A>1</A><S><B class="bold">2</B></S><C>3</C></R>'
     node = parse_sxpr(sxpr)
     et = node.as_etree()
     assert ET.tostring(et, encoding="unicode") == xml, ET.tostring(
         et, encoding="unicode")
     node = Node.from_etree(et)
     assert node.as_sxpr() == sxpr
     et = ET.XML(
         '<R>mixed <A>1</A>mode <!-- comment --><B class="italic" /></R>')
     node = Node.from_etree(et)
     expected_sxpr = '(R (:Text "mixed ") (A "1") (:Text "mode ") (B `(class "italic")))'
     assert node.as_sxpr() == expected_sxpr
     et = node.as_etree()
     et = ET.XML(ET.tostring(et, encoding="unicode"))
     node = Node.from_etree(et)
     assert node.as_sxpr() == expected_sxpr
     empty_tags = set()
     tree = parse_xml('<a><b>1<c>2<d />3</c></b>4</a>',
                      out_empty_tags=empty_tags)
     etree = tree.as_etree(empty_tags=empty_tags)
     assert ET.tostring(etree).replace(
         b' /', b'/') == b'<a><b>1<c>2<d/>3</c></b>4</a>'
     tree = Node.from_etree(etree)
     assert flatten_sxpr(tree.as_sxpr()) == \
            '(a (b (:Text "1") (c (:Text "2") (d) (:Text "3"))) (:Text "4"))'
Example #11
0
 def test_insert_remove(self):
     node = parse_sxpr('(R (A "1") (B "2") (C "3"))')
     B = node.pick('B')
     node.remove(B)
     assert node.as_sxpr() == '(R (A "1") (C "3"))'
     node.insert(0, B)
     assert node.as_sxpr() == '(R (B "2") (A "1") (C "3"))'
Example #12
0
    def test_attr_serialization_and_parsing(self):
        n = Node('employee', 'James Bond').with_pos(46)
        n.attr['branch'] = 'Secret Service'
        n.attr['id'] = '007'
        # json
        json = n.as_json()
        tree = parse_json_syntaxtree(json)
        # print()

        # XML
        xml = n.as_xml()
        assert xml.find('_pos') < 0
        xml = n.as_xml('')
        assert xml.find('_pos') >= 0
        tree = parse_xml(xml)
        assert tree.pos == 46
        assert not '_pos' in tree.attr
        tree = parse_xml(xml, ignore_pos=True)
        assert '_pos' in tree.attr
        assert tree._pos < 0

        # S-Expression
        sxpr = n.as_sxpr()
        assert sxpr.find('pos') < 0
        sxpr = n.as_sxpr('')
        assert sxpr.find('pos') >= 0
        tree = parse_sxpr(sxpr)
        assert tree.pos == 46
        assert not 'pos' in tree.attr
Example #13
0
 def test_sxpr_roundtrip(self):
     sxpr = (
         '(BelegText (Anker "interdico_1") (BelegLemma "inter.|ticente") (TEXT ", (") '
         '(Anker "interdico_2") (BelegLemma "inter.|titente") (L " ") (Zusatz "var. l.") '
         '(TEXT ") Deo."))')
     tree = parse_sxpr(sxpr)
     assert flatten_sxpr(tree.as_sxpr()) == sxpr
Example #14
0
 def test_mock_syntax_tree_with_classes(self):
     sexpr = '(a:class1 (b:class2 x) (:class3 y) (c z))'
     tree = parse_sxpr(sexpr)
     assert tree.tag_name == 'a'
     assert tree.result[0].tag_name == 'b'
     assert tree.result[1].tag_name == ':class3'
     assert tree.result[2].tag_name == 'c'
Example #15
0
 def test_parse_s_expression_malformed(self):
     try:
         s = parse_sxpr(
             '(A (B 1) (C (D (E 2) (F 3)) (G 4) (H (I 5) (J 6)) (K 7)')
         assert False, "ValueError exptected!"
     except ValueError:
         pass
Example #16
0
 def test_roundtrip(self):
     tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
     xml = tree.as_xml()
     fxml = flatten_xml(xml)
     assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
     tree2 = parse_xml(fxml)
     assert fxml == flatten_xml(tree2.as_xml())
Example #17
0
class TestOptimizations:
    model = RootNode(
        parse_sxpr('''(array
          (number "1")
          (number
            (:RegExp "2")
            (:RegExp ".")
            (:RegExp "0"))
          (string "a string"))''')).with_pos(0)

    def raise_error(self, context):
        raise AssertionError()

    def test_squeeze_tree(self):
        tree = copy.deepcopy(TestOptimizations.model)
        merge_treetops(tree)
        assert tree.as_sxpr(
        ) == '''(array (number "1") (number "2.0") (string "a string"))'''

    def test_blocking(self):
        tree = copy.deepcopy(TestOptimizations.model)
        transtable = {
            '<': BLOCK_ANONYMOUS_LEAVES,
            'number': [merge_leaves, reduce_single_child],
            ':RegExp': self.raise_error
        }
        traverse(tree, transtable)
        assert tree.equals(
            parse_sxpr(
                '(array (number "1") (number "2.0") (string "a string"))'))
Example #18
0
 def test_copy_errors(self):
     tree = RootNode(parse_sxpr('(A (B "1") (C "2"))').with_pos(0))
     tree.add_error(tree['C'], Error('error', 1))
     tree.add_error(None, Error('unspecific error', 2))
     save = tree.as_sxpr()
     tree_copy = copy.deepcopy(tree)
     compare = tree_copy.as_sxpr()
     assert compare == save  # is the error message still included?
Example #19
0
    def test_positions_of(self):
        tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0)
        assert positions_of([tree], 'A') == ()
        assert positions_of([tree], 'X') == ()
        assert positions_of([tree], 'C') == (1, )
        assert positions_of([tree], 'B') == (0, 2)

        tree = parse_sxpr('(A (B 1) (C 2) (D 3))').with_pos(0)
        trans_table = {'A': insert(positions_of('D'), node_maker('X', '0'))}
        traverse(tree, trans_table)
        result1 = tree.serialize()
        assert result1 == '(A (B "1") (C "2") (X "0") (D "3"))', result1

        trans_table = {'A': insert(positions_of('Z'), node_maker('X', '0'))}
        traverse(tree, trans_table)
        result2 = tree.serialize()
        assert result2 == '(A (B "1") (C "2") (X "0") (D "3"))', result2
Example #20
0
    def test_mock_syntax_tree(self):
        sexpr = '(a (b c) (d e) (f (g h)))'
        tree = parse_sxpr(sexpr)
        assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr

        # test different quotation marks
        sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))'''
        sexpr_stripped = '(a (b c k l) (d e) (f (g h)))'
        tree = parse_sxpr(sexpr)
        assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped

        sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))'
        tree = parse_sxpr(sexpr_clean)
        assert flatten_sxpr(tree.as_sxpr()) == sexpr_clean

        tree = parse_sxpr(sexpr_stripped)
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
Example #21
0
    def test_merge_adjacent(self):
        sentence = parse_sxpr('(SENTENCE (TEXT "Guten") (L " ") (TEXT "Tag") '
                              ' (T "\n") (TEXT "Hallo") (L " ") (TEXT "Welt")'
                              ' (T "\n") (L " "))')
        transformations = {
            'SENTENCE': merge_adjacent(is_one_of('TEXT', 'L'), 'TEXT')
        }
        traverse(sentence, transformations)
        assert tree_sanity_check(sentence)
        assert sentence.pick_child('TEXT').result == "Guten Tag"
        assert sentence[2].result == "Hallo Welt"
        assert sentence[-1].tag_name == 'L'
        assert 'T' in sentence

        # leaf nodes should be left untouched
        sentence = parse_sxpr('(SENTENCE "Hallo Welt")')
        traverse(sentence, transformations)
        assert sentence.content == "Hallo Welt", sentence.content
Example #22
0
 def test_move_adjacent3(self):
     sentence = parse_sxpr(
         '(SENTENCE  (:Whitespace " ") (:Whitespace " ")  '
         '(TEXT (PHRASE "Guten Tag") (:Whitespace " ")))')
     transformations = {
         'TEXT':
         move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE)
     }
     traverse(sentence, transformations)
Example #23
0
 def test_parse_s_expression(self):
     tree = parse_sxpr('(a (b c))')
     assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(
         tree.as_sxpr())
     tree = parse_sxpr('(a i\nj\nk)')
     assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(
         tree.as_sxpr())
     try:
         tree = parse_sxpr('a b c')
         assert False, "parse_sxpr() should raise a ValueError " \
                       "if argument is not a tree!"
     except ValueError:
         pass
     try:
         tree = parse_sxpr('(a (b c)))')
         assert False, "parse_sxpr() should raise a ValueError for too many matching brackets."
     except ValueError:
         pass
Example #24
0
 def test_xml_tag_omission(self):
     tree = parse_sxpr('(XML (T "Hallo") (L " ") (T "Welt!"))')
     all_tags = {'XML', 'T', 'L'}
     assert tree.as_xml(inline_tags=all_tags,
                        string_tags=all_tags) == "Hallo Welt!"
     # tags with attributes will never be ommitted
     tree.pick_child('T').attr['class'] = "kursiv"
     assert tree.as_xml(inline_tags=all_tags, string_tags=all_tags) == \
            '<T class="kursiv">Hallo</T> Welt!'
Example #25
0
 def test_select_children(self):
     tree = parse_sxpr('(A (B 1) (C (X 1) (Y 1)) (B 2))')
     children = list(nd.tag_name for nd in tree.select_children(ALL_NODES))
     assert children == ['B', 'C', 'B']
     B_values = list(nd.content
                     for nd in tree.select_children('B', reverse=True))
     assert B_values == ['2', '1']
     B_indices = tree.indices('B')
     assert B_indices == (0, 2)
Example #26
0
 def test_remove_content(self):
     cst = parse_sxpr(
         '(BelegLemma (:Series (:RegExp "#") (LAT_WORT (:RegExp "facitergula"))))'
     )
     remove_content([cst], '#')
     assert cst.content == "#facitergula", str(cst.content)
     reduce_single_child([cst])
     remove_content([cst], '#')
     assert cst.content == "facitergula"
Example #27
0
 def test_index(self):
     tree = parse_sxpr('(a (b 0) (c 1) (d 2))')
     assert tree.index('d') == 2
     assert tree.index('b') == 0
     assert tree.index('c') == 1
     try:
         i = tree.index('x')
         raise AssertionError('ValueError expected!')
     except ValueError:
         pass
Example #28
0
 def test_contains(self):
     tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
     assert 'a' not in tree
     assert any(tree.select('a', True))
     assert not any(tree.select('a', False))
     assert 'b' in tree
     assert 'X' in tree
     assert 'e' in tree
     assert 'c' not in tree
     assert any(tree.select('c', False))
Example #29
0
 def test_rstrip(self):
     cst = parse_sxpr('(_Token (:Re test) (:Whitespace " "))')
     rstrip([cst])
     assert cst.as_sxpr().find(":Whitespace") < 0, cst.as_sxpr()
     sxpr1 = cst.as_sxpr()
     rstrip([cst])
     assert sxpr1 == cst.as_sxpr()
     cst = parse_sxpr('(_Token)')
     rstrip([cst])
     assert cst.as_sxpr() == '(_Token)'
     cst = parse_sxpr(
         '(_Token  (:Re test) (:Whitespace " ") (:Whitespace " "))')
     rstrip([cst])
     assert cst.as_sxpr().find(":Whitespace") < 0
     cst = parse_sxpr(
         '(_Token  (:Re test) (:Whitespace " ") (Deeper (:Whitespace " ")) '
         '(:Whitespace " "))')
     rstrip([cst])
     assert cst.as_sxpr().find(":Whitespace") < 0, cst.as_sxpr()
Example #30
0
 def test_content_property(self):
     tree = RootNode(parse_sxpr('(a (b c) (d e))'))
     content = tree.content
     b = tree.pick('b')
     d = tree.pick('d')
     b.result = "recently "
     d.result = "changed"
     assert content != tree.content
     assert content == 'ce'
     assert tree.content == 'recently changed'