def test_punctuation_verylow(discont_tree, cont_tree):
    """transform.punctuation_verylow
    """
    terminals = trees.terminals(discont_tree)
    old_vp_children = terminals[0].parent.children
    old_q_parent = terminals[-1].parent
    terminals[0].data['word'] = "("
    terminals[7].data['word'] = ")"
    discont_tree = transform.punctuation_verylow(discont_tree)
    new_vp_children = terminals[0].parent.children
    new_q_parent = terminals[-1].parent
    new_labels_test = [u'VROOT', u'S', u'VP', u'SBAR', u'VP', u'WP',
                       u'VB', u'?', u'IN', u'NP', u'NNP', u'VB',
                       u'NNP', u'VB', u'NNP']
    assert old_q_parent == discont_tree
    assert old_vp_children == new_vp_children
    assert new_q_parent == terminals[-2].parent
    assert new_labels_test == [node.data['label'] for node in
                               trees.preorder(discont_tree)]
    terminals = trees.terminals(cont_tree)
    old_vp_children = terminals[0].parent.children
    old_q_parent = terminals[-1].parent
    terminals[0].data['word'] = "("
    terminals[7].data['word'] = ")"
    cont_tree = transform.punctuation_verylow(cont_tree)
    new_vp_children = terminals[0].parent.children
    new_q_parent = terminals[-1].parent
    new_labels_test = [u'VROOT', u'S', u'WP', u'VB', u'NNP',
                       u'VP', u'VB', u'NNP', u'SBAR', u'IN',
                       u'NP', u'NNP', u'VP', u'VB', u'?']
    assert old_q_parent == cont_tree
    assert old_vp_children == new_vp_children
    assert new_q_parent == terminals[-2].parent
    assert new_labels_test == [node.data['label'] for node in
                               trees.preorder(cont_tree)]
def test_delete_terminal(discont_tree, cont_tree):
    """trees.delete_terminal
    """
    # discont
    old_num_nodes = len([node for node in trees.preorder(discont_tree)])
    terminals = trees.terminals(discont_tree)
    to_remove = terminals[0]
    to_remove_p = to_remove.parent
    result = trees.delete_terminal(discont_tree, to_remove)
    res_words = [node.data['word'] for node in trees.terminals(discont_tree)]
    res_num_nodes = len([node for node in trees.preorder(discont_tree)])
    assert result == to_remove_p
    assert res_words == testdata.WORDS[1:]
    assert res_num_nodes == old_num_nodes - 1
    # cont
    old_num_nodes = len([node for node in trees.preorder(cont_tree)])
    terminals = trees.terminals(cont_tree)
    to_remove = terminals[0]
    to_remove_p = to_remove.parent
    result = trees.delete_terminal(cont_tree, to_remove)
    res_words = [node.data['word'] for node in trees.terminals(cont_tree)]
    res_num_nodes = len([node for node in trees.preorder(cont_tree)])
    assert result == to_remove_p
    assert res_words == testdata.WORDS[1:]
    assert res_num_nodes == old_num_nodes - 1
def test_binarize(discont_tree, cont_tree):
    """See transform.binarize
    """
    tree = discont_tree
    tree = transform.negra_mark_heads(tree)
    tree = transform.binarize(tree)
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    assert labels == testdata.DISCONT_LABELS_BIN_PREORDER
    tree = cont_tree
    tree = transform.negra_mark_heads(tree)
    tree = transform.binarize(tree)
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    assert labels == testdata.CONT_LABELS_BIN_PREORDER
def test_terminal_blocks(discont_tree, cont_tree):
    """trees.terminal_blocks
    """
    for node in trees.preorder(discont_tree):
        if node.data['label'] == 'VP':
            blocks = [set([term.data['num'] for term in block]) for block
                      in trees.terminal_blocks(node)]
            assert blocks == [set(block) for block in testdata.DISCONT_BLOCKS_VP]
            break
    for node in trees.preorder(cont_tree):
        if node.data['label'] == 'VP':
            blocks = [set([term.data['num'] for term in block]) for block
                      in trees.terminal_blocks(node)]
            assert blocks == [set(block) for block in testdata.CONT_BLOCKS_VP]
            break
def test_analysis(discont_tree, cont_tree):
    """See treeanalysis
    """
    gapdegree = treeanalysis.GapDegree()
    gapdegree.run(cont_tree)
    gapdegree.run(discont_tree)
    assert sum(gapdegree.gaps_per_tree.values()) == 2
    assert sum(gapdegree.gaps_per_node.values()) == 12
    assert gapdegree.gaps_per_tree[0] == 1
    assert gapdegree.gaps_per_tree[1] == 1
    assert gapdegree.gaps_per_node[0] == 9
    assert gapdegree.gaps_per_node[1] == 3
    assert treeanalysis.gap_degree(discont_tree) == 1
    assert treeanalysis.gap_degree(cont_tree) == 0
    treeoutput.compute_export_numbering(discont_tree)
    for node in trees.preorder(discont_tree):
        if node.data['num'] in [500, 502, 503]:
            assert treeanalysis.gap_degree_node(node) == 1
        else:
            assert treeanalysis.gap_degree_node(node) == 0
    postags = treeanalysis.PosTags()
    postags.run(discont_tree)
    assert postags.tags == testdata.POS
    sentencecount = treeanalysis.SentenceCount()
    sentencecount.run(discont_tree)
    sentencecount.run(cont_tree)
    assert sentencecount.cnt == 2
def test_discont_general(discont_tree):
    """General tests concerning discontinuous trees.
    """
    tree = discont_tree
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    terms = trees.terminals(tree)
    words = [node.data['word'] for node in terms]
    uterms = trees.unordered_terminals(tree)
    uwords = [node.data['word'] for node in uterms]
    tree = transform.negra_mark_heads(tree)
    tree = transform.binarize(tree)
    left_reorder = [node.data['num'] for node
                    in treeanalysis.disco_order(tree, 'left')]
    rightd_reorder = [node.data['num'] for node
                      in treeanalysis.disco_order(tree, 'rightd')]
    assert left_reorder == testdata.DISCONT_LEFT_REORDER
    assert rightd_reorder == testdata.DISCONT_RIGHTD_REORDER
    assert all(['num' in node.data for node in terms])
    assert all([node in uterms for node in terms])
    assert len(terms) == 9
    assert len(uterms) == 9
    assert len(nodes) == 15
    assert labels == testdata.DISCONT_LABELS_PREORDER
    assert words == testdata.WORDS
    assert set(uwords) == set(testdata.WORDS)
def test_add_topnode(discont_tree, cont_tree):
    """transform.add_topnode
    """
    dtree = discont_tree
    discont_nodes = [node for node in trees.preorder(dtree)]
    dtree = transform.add_topnode(dtree)
    discont_nodes_p = [node for node in trees.preorder(dtree)]
    assert len(discont_nodes) == len(discont_nodes_p) - 1
    assert discont_tree.parent == dtree
    assert len(trees.children(dtree)) == 1
    ctree = cont_tree
    cont_nodes = [node for node in trees.preorder(ctree)]
    ctree = transform.add_topnode(ctree)
    cont_nodes_p = [node for node in trees.preorder(ctree)]
    assert len(cont_nodes) == len(cont_nodes_p) - 1
    assert cont_tree.parent == ctree
    assert len(trees.children(ctree)) == 1
def test_left_sibling(discont_tree, cont_tree):
    """trees.right_sibling
    """
    tree = discont_tree
    rs = []
    for node in trees.preorder(discont_tree):
        sibling = trees.left_sibling(node)
        if sibling is None:
            rs.append(sibling)
        else:
            rs.append(sibling.data['label'])
    ctree = cont_tree
    crs = []
    for node in trees.preorder(cont_tree):
        sibling = trees.left_sibling(node)
        if sibling is None:
            crs.append(sibling)
        else:
            crs.append(sibling.data['label'])
    assert rs == testdata.DISCONT_LEFTSIB_PREORDER
    assert crs == testdata.CONT_LEFTSIB_PREORDER
def test_boyd(discont_tree):
    """See transform.boyd_split
    """
    tree = discont_tree
    tree = transform.root_attach(tree)
    tree = transform.negra_mark_heads(tree)
    tree = transform.boyd_split(tree)
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    terms = trees.terminals(tree)
    words = [node.data['word'] for node in terms]
    uterms = trees.unordered_terminals(tree)
    uwords = [node.data['word'] for node in uterms]
    assert labels == testdata.DISCONT_LABELSBOYD_PREORDER
    assert words == testdata.WORDS
    assert set(uwords) == set(testdata.WORDS)
Beispiel #10
0
def test_root_attach(discont_tree):
    """See transform.root_attach
    """
    tree = discont_tree
    tree = transform.root_attach(tree)
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    terms = trees.terminals(tree)
    words = [node.data['word'] for node in terms]
    uterms = trees.unordered_terminals(tree)
    uwords = [node.data['word'] for node in uterms]
    assert labels == testdata.DISCONT_LABELS_PREORDER
    assert words == testdata.WORDS
    assert set(uwords) == set(testdata.WORDS)
    with pytest.raises(ValueError):
        transform.boyd_split(tree)
Beispiel #11
0
def test_cont_general(cont_tree):
    """General tests concerning continuous trees.
    """
    tree = cont_tree
    terms = trees.terminals(tree)
    uterms = trees.unordered_terminals(tree)
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    words = [node.data['word'] for node in terms]
    uwords = [node.data['word'] for node in uterms]
    assert all(['num' in node.data for node in terms])
    assert all([node in uterms for node in terms])
    assert len(terms) == 9
    assert len(uterms) == 9
    assert len(nodes) == 15
    assert labels == testdata.CONT_LABELS_PREORDER
    assert words == testdata.WORDS
    assert set(uwords) == set(testdata.WORDS)
Beispiel #12
0
def test_discont_output(discont_tree):
    """Test tree output
    """
    stream = StringIO()
    # export: check if all fields are the same
    treeoutput.export(discont_tree, stream)
    result = stream.getvalue()
    original = testdata.SAMPLE_EXPORT
    for result_line, original_line in zip(result.split('\n'), original.split('\n')):
        for result_f, original_f in zip(result_line.split(), original_line.split()):
            assert result_f == original_f
    treeoutput.compute_export_numbering(discont_tree)
    numbers = [node.data['num'] for node in trees.preorder(discont_tree)]
    assert numbers == testdata.DISCONT_EXPORT_NUMBERING
    # tigerxml: check linewise if output is the same as sample
    stream = StringIO()
    treeoutput.tigerxml(discont_tree, stream)
    result = stream.getvalue()
    original = '\n'.join(testdata.SAMPLE_TIGERXML.split('\n')[3:-3])
    for result_line, original_line in zip(result.split('\n'), original.split('\n')):
        assert result_line == original_line
Beispiel #13
0
def test_labels(cont_tree):
    """General test concerning the parsing and output of labels
    """
    label = ""
    e = trees.parse_label(label)
    assert e.label == trees.DEFAULT_LABEL
    assert e.gf == trees.DEFAULT_EDGE
    assert e.gf_separator == trees.DEFAULT_GF_SEPARATOR
    assert e.coindex == ""
    assert e.gapindex == ""
    assert not e.headmarker
    assert not e.is_trace
    olabel = trees.format_label(e)
    assert olabel == label
    label = "-NONE-"
    e = trees.parse_label(label)
    assert e.label == "-NONE-"
    assert not e.is_trace
    olabel = trees.format_label(e)
    assert olabel == label
    label = "A--A=1---2"
    e = trees.parse_label(label)
    assert e.label == "A"
    assert e.gf == "-A=1--"
    assert e.coindex == "2"
    assert not e.is_trace
    olabel = trees.format_label(e)
    assert olabel == label
    label = "A--A-1--=2"
    e = trees.parse_label(label)
    assert e.label == "A"
    assert e.gf == "-A-1--"
    assert e.coindex == ""
    assert e.gapindex == "2"
    assert not e.is_trace
    olabel = trees.format_label(e)
    assert olabel == label
    label = "*LAB*-GF=1'"
    e = trees.parse_label(label)
    assert e.label == "*LAB*"
    assert e.gf == "GF"
    assert e.gapindex == "1"
    assert e.headmarker
    assert e.is_trace
    olabel = trees.format_label(e)
    assert olabel == label
    cands = {"(": "X", "{": "Y", "]": "Z"}
    cont_tree_labels = [node.data['label'] for node
                        in trees.preorder(cont_tree)]
    cont_tree.data['label'] = "A(B{C]D"
    trees.replace_chars(cont_tree, cands)
    cont_tree_labels_goal = list(cont_tree_labels)
    cont_tree_labels_goal[0] = "AXBYCZD"
    cont_tree_labels_new = [node.data['label'] for node
                            in trees.preorder(cont_tree)]
    cont_tree.data['label'] = 10
    cont_tree_labels_goal[0] = 10
    trees.replace_chars(cont_tree, cands)
    cont_tree_labels_new = [node.data['label'] for node
                            in trees.preorder(cont_tree)]
    assert cont_tree_labels_new == cont_tree_labels_goal