def test_conll_readwrite_sentence_extra_whitespace():
    sentence = Sentence.from_conll(conll_example2.splitlines() + ['', '', ''])
    assert sentence.as_conll() == conll_example2.strip()

    sentence = Sentence.from_conll(['', ''] + conll_example2.splitlines() +
                                   ['', ''])
    assert sentence.as_conll() == conll_example2.strip()

    sentence = Sentence.from_conll([''] + conll_example2.splitlines())
    assert sentence.as_conll() == conll_example2.strip()
def test_conll_readwrite_sentence_extra_whitespace():
    sentence = Sentence.from_conll(conll_example2.splitlines() + ['', '', ''])
    assert sentence.as_conll() == conll_example2.strip()

    sentence = Sentence.from_conll(['', ''] + conll_example2.splitlines() +
                                   ['', ''])
    assert sentence.as_conll() == conll_example2.strip()

    sentence = Sentence.from_conll([''] + conll_example2.splitlines())
    assert sentence.as_conll() == conll_example2.strip()
def test_conll_as_dotgraph_custom_nodeformat():
    if older_than_py27:  # this feature disabled in older Pythons
        return
    formatted_dotgraph = """
digraph {
	0 [label=root color=red]
	1 [label=Cathy color=green]
		2 -> 1 [label=su]
	2 [label=zag color=blue]
		0 -> 2 [label=ROOT]
	3 [label=hen]
		2 -> 3 [label=obj1]
	4 [label=wild]
		5 -> 4 [label=mod]
	5 [label=zwaaien color=green]
		2 -> 5 [label=vc]
	6 [label="."]
		5 -> 6 [label=punct]
}
""".strip()

    def node_formatter(token):
        if token is None:
            return {"color": "red"}
        elif token.cpos == "N":
            return {"color": "green"}
        elif token.cpos == "V":
            return {"color": "blue"}
        else:
            return {}

    sentence = Sentence.from_conll(conll_example.splitlines())
    dotgraph = sentence.as_dotgraph(node_formatter=node_formatter)
    assert dotgraph.source == formatted_dotgraph
def test_conll_as_dotgraph_nontree():
    if older_than_py27:  # this feature disabled in older Pythons
        return
    sample_deps = """
det(burrito-2, A-1)
root(ROOT-0, burrito-2)
prep_with(burrito-2, beans-4)
prep_with(burrito-2, chicken-7)
conj_negcc(beans-4, chicken-7)
punct(burrito-2, .-8)
    """.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps, tree4)
    assert len(sentence) == 6
    assert (
        sentence.as_dotgraph().source
        == """
digraph {
	0 [label=root]
	1 [label=A]
		2 -> 1 [label=det]
	2 [label=burrito]
		0 -> 2 [label=root]
	4 [label=beans]
		2 -> 4 [label=prep_with]
	7 [label=chicken]
		2 -> 7 [label=prep_with]
		4 -> 7 [label=conj_negcc]
	8 [label="."]
		2 -> 8 [label=punct]
}
""".strip()
    )
def test_conll_as_dotgraph_custom_edgeformat():
    if older_than_py27:  # this feature disabled in older Pythons
        return
    formatted_dotgraph = """
digraph {
	0 [label=root]
	1 [label=Cathy]
		2 -> 1 [label=su color=blue]
	2 [label=zag]
		0 -> 2 [label=ROOT color=red]
	3 [label=hen]
		2 -> 3 [label=obj1 color=blue]
	4 [label=wild]
		5 -> 4 [label=mod color=blue]
	5 [label=zwaaien]
		2 -> 5 [label=vc color=blue]
	6 [label="."]
		5 -> 6 [label=punct color=blue]
}
""".strip()

    sentence = Sentence.from_conll(conll_example.splitlines())

    def edge_formatter(token):
        if token.head == 0:
            return {"color": "red"}
        else:
            return {"color": "blue"}

    dotgraph = sentence.as_dotgraph(edge_formatter=edge_formatter)
    assert dotgraph.source == formatted_dotgraph
def test_conll_as_dotgraph_custom_edgeformat():
    if older_than_py27:  # this feature disabled in older Pythons
        return
    formatted_dotgraph = '''
digraph {
	0 [label=root]
	1 [label=Cathy]
		2 -> 1 [label=su color=blue]
	2 [label=zag]
		0 -> 2 [label=ROOT color=red]
	3 [label=hen]
		2 -> 3 [label=obj1 color=blue]
	4 [label=wild]
		5 -> 4 [label=mod color=blue]
	5 [label=zwaaien]
		2 -> 5 [label=vc color=blue]
	6 [label="."]
		5 -> 6 [label=punct color=blue]
}
'''.strip()

    sentence = Sentence.from_conll(conll_example.splitlines())

    def edge_formatter(token):
        if token.head == 0:
            return {'color': 'red'}
        else:
            return {'color': 'blue'}

    dotgraph = sentence.as_dotgraph(edge_formatter=edge_formatter)
    assert dotgraph.source == formatted_dotgraph
def test_conll_as_asciitree_nontree_erased():
    sample_deps = """
det(burrito-2, A-1)
root(ROOT-0, burrito-2)
prep_with(burrito-2, beans-4)
prep_with(burrito-2, chicken-7)
conj_negcc(beans-4, chicken-7)
punct(burrito-2, .-8)
    """.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps, tree4, include_erased=True)
    assert len(sentence) == 9
    assert (
        sentence.as_asciitree().strip()
        == """
 ROOT [ROOT-DEPREL]
  +-- burrito [root]
  |  +-- A [det]
  |  +-- beans [prep_with]
  |  |  +-- chicken [conj_negcc]
  |  +-- chicken [prep_with]
  |  +-- . [punct]
  +-- with [erased]
  +-- but [erased]
  +-- not [erased]
""".strip()
    )
def test_conll_as_asciitree_nontree_erased():
    sample_deps = '''
det(burrito-2, A-1)
root(ROOT-0, burrito-2)
prep_with(burrito-2, beans-4)
prep_with(burrito-2, chicken-7)
conj_negcc(beans-4, chicken-7)
punct(burrito-2, .-8)
    '''.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps,
                                                   trees_sd.tree4,
                                                   include_erased=True)
    assert len(sentence) == 9
    assert sentence.as_asciitree().strip() == '''
 ROOT [ROOT-DEPREL]
  +-- burrito [root]
  |  +-- A [det]
  |  +-- beans [prep_with]
  |  |  +-- chicken [conj_negcc]
  |  +-- chicken [prep_with]
  |  +-- . [punct]
  +-- with [erased]
  +-- but [erased]
  +-- not [erased]
'''.strip()
def test_conll_as_dotgraph_nontree():
    if older_than_py27:  # this feature disabled in older Pythons
        return
    sample_deps = '''
det(burrito-2, A-1)
root(ROOT-0, burrito-2)
prep_with(burrito-2, beans-4)
prep_with(burrito-2, chicken-7)
conj_negcc(beans-4, chicken-7)
punct(burrito-2, .-8)
    '''.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps, trees_sd.tree4)
    assert len(sentence) == 6
    assert sentence.as_dotgraph().source == '''
digraph {
	0 [label=root]
	1 [label=A]
		2 -> 1 [label=det]
	2 [label=burrito]
		0 -> 2 [label=root]
	4 [label=beans]
		2 -> 4 [label=prep_with]
	7 [label=chicken]
		2 -> 7 [label=prep_with]
		4 -> 7 [label=conj_negcc]
	8 [label="."]
		2 -> 8 [label=punct]
}
'''.strip()
def test_conll_as_dotgraph_custom_nodeformat():
    if older_than_py27:  # this feature disabled in older Pythons
        return
    formatted_dotgraph = '''
digraph {
	0 [label=root color=red]
	1 [label=Cathy color=green]
		2 -> 1 [label=su]
	2 [label=zag color=blue]
		0 -> 2 [label=ROOT]
	3 [label=hen]
		2 -> 3 [label=obj1]
	4 [label=wild]
		5 -> 4 [label=mod]
	5 [label=zwaaien color=green]
		2 -> 5 [label=vc]
	6 [label="."]
		5 -> 6 [label=punct]
}
'''.strip()

    def node_formatter(token):
        if token is None:
            return {'color': 'red'}
        elif token.cpos == 'N':
            return {'color': 'green'}
        elif token.cpos == 'V':
            return {'color': 'blue'}
        else:
            return {}

    sentence = Sentence.from_conll(conll_example.splitlines())
    dotgraph = sentence.as_dotgraph(node_formatter=node_formatter)
    assert dotgraph.source == formatted_dotgraph
def test_read_sd_sentence_punct():
    sample_deps = """
root(ROOT-0, Sentences-1)
punct(Sentences-1, :-2)
dep(Sentences-1, words-3)
punct(sometimes-8, -LRB--4)
prep(sometimes-8, with-5)
pobj(with-5, punctuation-6)
punct(sometimes-8, ---7)
dep(words-3, sometimes-8)
punct(sometimes-8, -RRB--9)
punct(Sentences-1, .-10)
    """.strip().splitlines()
    tree = """(ROOT (NP (NP (NNS Sentences)) (: :) (NP (NP (NNS words))
    (PRN (-LRB- -LRB-) (FRAG (PP (IN with) (NP (NN punctuation))) (: --)
    (ADVP (RB sometimes))) (-RRB- -RRB-))) (. .)))"""
    output = """
Token(index=1, form='Sentences', cpos='NNS', pos='NNS', head=0, deprel='root')
Token(index=2, form=':', cpos=':', pos=':', head=1, deprel='punct')
Token(index=3, form='words', cpos='NNS', pos='NNS', head=1, deprel='dep')
Token(index=4, form='-LRB-', cpos='-LRB-', pos='-LRB-', head=8, deprel='punct')
Token(index=5, form='with', cpos='IN', pos='IN', head=8, deprel='prep')
Token(index=6, form='punctuation', cpos='NN', pos='NN', head=5, deprel='pobj')
Token(index=7, form='--', cpos=':', pos=':', head=8, deprel='punct')
Token(index=8, form='sometimes', cpos='RB', pos='RB', head=3, deprel='dep')
Token(index=9, form='-RRB-', cpos='-RRB-', pos='-RRB-', head=8, deprel='punct')
Token(index=10, form='.', cpos='.', pos='.', head=1, deprel='punct')
    """.strip()

    sentence = Sentence.from_stanford_dependencies(sample_deps, tree)
    assert stringify_sentence(sentence) == output

    output_no_punct = """
Token(index=1, form='Sentences', cpos='NNS', pos='NNS', head=0, deprel='root')
Token(index=3, form='words', cpos='NNS', pos='NNS', head=1, deprel='dep')
Token(index=5, form='with', cpos='IN', pos='IN', head=8, deprel='prep')
Token(index=6, form='punctuation', cpos='NN', pos='NN', head=5, deprel='pobj')
Token(index=8, form='sometimes', cpos='RB', pos='RB', head=3, deprel='dep')
    """.strip()
    sentence2 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=False)
    assert stringify_sentence(sentence2) == output_no_punct

    sentence3 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=False, include_erased=True)
    assert stringify_sentence(sentence3) == output_no_punct

    sentence4 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=True, include_erased=True)
    assert stringify_sentence(sentence4) == output

    tree2 = "(ROOT(NP(NP-SBJ(NNS Sentences))(: :)(NP(NP(NNS words))(PRN(-LRB- -LRB-)(FRAG(PP(IN with)(NP(NN punctuation)))(: --)(ADVP(RB sometimes)))(-RRB- -RRB-)))(. .)))"
    sentence5 = Sentence.from_stanford_dependencies(sample_deps, tree2)
    assert sentence5 == sentence

    tree3 = "((NP(NP(NNS Sentences))(: :)(NP(NP(NNS words))(PRN(-LRB- -LRB-)(FRAG(PP(IN with)(NP(NN punctuation)))(: --)(ADVP(RB sometimes)))(-RRB- -RRB-)))(. .)))"
    sentence6 = Sentence.from_stanford_dependencies(sample_deps, tree3)
    assert sentence6 == sentence

    tree4 = " ( ROOT(NP   ( NP-SBJ (NNS Sentences))(: :)(\nNP\n(NP(NNS\nwords)) (PRN (-LRB- -LRB-)(FRAG(PP    (IN with\n)\t(NP(NN punctuation )))(: --)(ADVP( RB sometimes )))(-RRB-    \t-RRB-)))(.\n\n\t.)))    "
    sentence7 = Sentence.from_stanford_dependencies(sample_deps, tree4)
    assert sentence7 == sentence
def test_read_sd_sentence():
    sample_deps = '''
det(burrito-2, A-1)
root(ROOT-0, burrito-2)
prep_with(burrito-2, beans-4)
prep_with(burrito-2, chicken-7)
conj_negcc(beans-4, chicken-7)
punct(burrito-2, .-8)
    '''.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps, trees_sd.tree4)
    assert stringify_sentence(sentence) == trees_sd.tree4_out_CCprocessed
def test_conll_as_asciitree():
    asciitree_out = """
 zag [ROOT]
  +-- Cathy [su]
  +-- hen [obj1]
  +-- zwaaien [vc]
     +-- wild [mod]
     +-- . [punct]"""

    sentence = Sentence.from_conll(conll_example.splitlines())
    assert sentence.as_asciitree().strip() == asciitree_out.strip()
def test_conll_as_asciitree():
    asciitree_out = '''
 zag [ROOT]
  +-- Cathy [su]
  +-- hen [obj1]
  +-- zwaaien [vc]
     +-- wild [mod]
     +-- . [punct]'''

    sentence = Sentence.from_conll(conll_example.splitlines())
    assert sentence.as_asciitree().strip() == asciitree_out.strip()
def test_read_sd_sentence():
    sample_deps = """
det(burrito-2, A-1)
root(ROOT-0, burrito-2)
prep_with(burrito-2, beans-4)
prep_with(burrito-2, chicken-7)
conj_negcc(beans-4, chicken-7)
punct(burrito-2, .-8)
    """.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps, tree4)
    assert stringify_sentence(sentence) == tree4_out_CCprocessed
def test_read_sd_sentence_sorting():
    # same as test_read_sd_sentence but check sorting
    sample_deps = '''
punct(burrito-2, .-8)
conj_negcc(beans-4, chicken-7)
prep_with(burrito-2, chicken-7)
prep_with(burrito-2, beans-4)
root(ROOT-0, burrito-2)
det(burrito-2, A-1)
    '''.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps, trees_sd.tree4)
    assert stringify_sentence(sentence) == trees_sd.tree4_out_CCprocessed
def test_conll_as_asciitree_custom():
    asciitree_out = """
ROOT:zag:V
  +--su:Cathy:N
  +--obj1:hen:Pron
  +--vc:zwaaien:N
     +--mod:wild:Adj
     +--punct:.:Punc"""

    def str_func(token):
        return "%s:%s:%s" % (token.deprel, token.form, token.cpos)

    sentence = Sentence.from_conll(conll_example.splitlines())
    assert sentence.as_asciitree(str_func=str_func).strip() == asciitree_out.strip()
def test_conll_as_asciitree_custom():
    asciitree_out = '''
ROOT:zag:V
  +--su:Cathy:N
  +--obj1:hen:Pron
  +--vc:zwaaien:N
     +--mod:wild:Adj
     +--punct:.:Punc'''

    def str_func(token):
        return '%s:%s:%s' % (token.deprel, token.form, token.cpos)

    sentence = Sentence.from_conll(conll_example.splitlines())
    assert sentence.as_asciitree(str_func=str_func).strip() == \
           asciitree_out.strip()
def test_conll_as_asciitree_nontree():
    sample_deps = '''
det(burrito-2, A-1)
root(ROOT-0, burrito-2)
prep_with(burrito-2, beans-4)
prep_with(burrito-2, chicken-7)
conj_negcc(beans-4, chicken-7)
punct(burrito-2, .-8)
    '''.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps, trees_sd.tree4)
    assert len(sentence) == 6
    assert sentence.as_asciitree().strip() == '''
 burrito [root]
  +-- A [det]
  +-- beans [prep_with]
  |  +-- chicken [conj_negcc]
  +-- chicken [prep_with]
  +-- . [punct]
'''.strip()
def test_conll_as_asciitree_nontree():
    sample_deps = '''
det(burrito-2, A-1)
root(ROOT-0, burrito-2)
prep_with(burrito-2, beans-4)
prep_with(burrito-2, chicken-7)
conj_negcc(beans-4, chicken-7)
punct(burrito-2, .-8)
    '''.strip().splitlines()
    sentence = Sentence.from_stanford_dependencies(sample_deps, trees_sd.tree4)
    assert len(sentence) == 6
    assert sentence.as_asciitree().strip() == '''
 burrito [root]
  +-- A [det]
  +-- beans [prep_with]
  |  +-- chicken [conj_negcc]
  +-- chicken [prep_with]
  +-- . [punct]
'''.strip()
def test_conll_as_dotgraph_custom_digraph_and_idprefix():
    if older_than_py27:  # this feature disabled in older Pythons
        return
    formatted_dotgraph = """
digraph test {
	x0 [label=root]
	x1 [label=Cathy]
		x2 -> x1 [label=su]
	x2 [label=zag]
		x0 -> x2 [label=ROOT]
	x3 [label=hen]
		x2 -> x3 [label=obj1]
	x4 [label=wild]
		x5 -> x4 [label=mod]
	x5 [label=zwaaien]
		x2 -> x5 [label=vc]
	x6 [label="."]
		x5 -> x6 [label=punct]
}""".strip()

    sentence = Sentence.from_conll(conll_example.splitlines())
    dotgraph = sentence.as_dotgraph(id_prefix="x", digraph_kwargs={"name": "test"})
def test_conll_as_dotgraph_custom_digraph_and_idprefix():
    if older_than_py27:  # this feature disabled in older Pythons
        return
    formatted_dotgraph = '''
digraph test {
	x0 [label=root]
	x1 [label=Cathy]
		x2 -> x1 [label=su]
	x2 [label=zag]
		x0 -> x2 [label=ROOT]
	x3 [label=hen]
		x2 -> x3 [label=obj1]
	x4 [label=wild]
		x5 -> x4 [label=mod]
	x5 [label=zwaaien]
		x2 -> x5 [label=vc]
	x6 [label="."]
		x5 -> x6 [label=punct]
}'''.strip()

    sentence = Sentence.from_conll(conll_example.splitlines())
    dotgraph = sentence.as_dotgraph(id_prefix='x',
                                    digraph_kwargs={'name': 'test'})
def test_conll_readwrite_sentence():
    sentence = Sentence.from_conll(conll_example.splitlines())
    assert sentence.as_conll() == conll_example.strip()
def test_conll_readwrite_sentence2():
    sentence = Sentence.from_conll(conll_example2.splitlines())
    assert sentence.as_conll() == conll_example2.strip()
def test_read_sd_sentence_punct():
    sample_deps = '''
root(ROOT-0, Sentences-1)
punct(Sentences-1, :-2)
dep(Sentences-1, words-3)
punct(sometimes-8, -LRB--4)
prep(sometimes-8, with-5)
pobj(with-5, punctuation-6)
punct(sometimes-8, ---7)
dep(words-3, sometimes-8)
punct(sometimes-8, -RRB--9)
punct(Sentences-1, .-10)
    '''.strip().splitlines()
    tree = '''(ROOT (NP (NP (NNS Sentences)) (: :) (NP (NP (NNS words))
    (PRN (-LRB- -LRB-) (FRAG (PP (IN with) (NP (NN punctuation))) (: --)
    (ADVP (RB sometimes))) (-RRB- -RRB-))) (. .)))'''
    output = '''
Token(index=1, form='Sentences', cpos='NNS', pos='NNS', head=0, deprel='root')
Token(index=2, form=':', cpos=':', pos=':', head=1, deprel='punct')
Token(index=3, form='words', cpos='NNS', pos='NNS', head=1, deprel='dep')
Token(index=4, form='-LRB-', cpos='-LRB-', pos='-LRB-', head=8, deprel='punct')
Token(index=5, form='with', cpos='IN', pos='IN', head=8, deprel='prep')
Token(index=6, form='punctuation', cpos='NN', pos='NN', head=5, deprel='pobj')
Token(index=7, form='--', cpos=':', pos=':', head=8, deprel='punct')
Token(index=8, form='sometimes', cpos='RB', pos='RB', head=3, deprel='dep')
Token(index=9, form='-RRB-', cpos='-RRB-', pos='-RRB-', head=8, deprel='punct')
Token(index=10, form='.', cpos='.', pos='.', head=1, deprel='punct')
    '''.strip()

    sentence = Sentence.from_stanford_dependencies(sample_deps, tree)
    assert stringify_sentence(sentence) == output

    output_no_punct = '''
Token(index=1, form='Sentences', cpos='NNS', pos='NNS', head=0, deprel='root')
Token(index=3, form='words', cpos='NNS', pos='NNS', head=1, deprel='dep')
Token(index=5, form='with', cpos='IN', pos='IN', head=8, deprel='prep')
Token(index=6, form='punctuation', cpos='NN', pos='NN', head=5, deprel='pobj')
Token(index=8, form='sometimes', cpos='RB', pos='RB', head=3, deprel='dep')
    '''.strip()
    sentence2 = Sentence.from_stanford_dependencies(sample_deps,
                                                    tree,
                                                    include_punct=False)
    assert stringify_sentence(sentence2) == output_no_punct

    sentence3 = Sentence.from_stanford_dependencies(sample_deps,
                                                    tree,
                                                    include_punct=False,
                                                    include_erased=True)
    assert stringify_sentence(sentence3) == output_no_punct

    sentence4 = Sentence.from_stanford_dependencies(sample_deps,
                                                    tree,
                                                    include_punct=True,
                                                    include_erased=True)
    assert stringify_sentence(sentence4) == output

    tree2 = '(ROOT(NP(NP-SBJ(NNS Sentences))(: :)(NP(NP(NNS words))(PRN(-LRB- -LRB-)(FRAG(PP(IN with)(NP(NN punctuation)))(: --)(ADVP(RB sometimes)))(-RRB- -RRB-)))(. .)))'
    sentence5 = Sentence.from_stanford_dependencies(sample_deps, tree2)
    assert sentence5 == sentence

    tree3 = '((NP(NP(NNS Sentences))(: :)(NP(NP(NNS words))(PRN(-LRB- -LRB-)(FRAG(PP(IN with)(NP(NN punctuation)))(: --)(ADVP(RB sometimes)))(-RRB- -RRB-)))(. .)))'
    sentence6 = Sentence.from_stanford_dependencies(sample_deps, tree3)
    assert sentence6 == sentence

    tree4 = ' ( ROOT(NP   ( NP-SBJ (NNS Sentences))(: :)(\nNP\n(NP(NNS\nwords)) (PRN (-LRB- -LRB-)(FRAG(PP    (IN with\n)\t(NP(NN punctuation )))(: --)(ADVP( RB sometimes )))(-RRB-    \t-RRB-)))(.\n\n\t.)))    '
    sentence7 = Sentence.from_stanford_dependencies(sample_deps, tree4)
    assert sentence7 == sentence