def test_conll_as_asciitree_nontree_erased(): sample_deps = """ det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) """.strip().splitlines() sentence = Sentence.from_stanford_dependencies(sample_deps, tree4, include_erased=True) assert len(sentence) == 9 assert ( sentence.as_asciitree().strip() == """ ROOT [ROOT-DEPREL] +-- burrito [root] | +-- A [det] | +-- beans [prep_with] | | +-- chicken [conj_negcc] | +-- chicken [prep_with] | +-- . [punct] +-- with [erased] +-- but [erased] +-- not [erased] """.strip() )
def test_conll_as_dotgraph_nontree(): if older_than_py27: # this feature disabled in older Pythons return sample_deps = """ det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) """.strip().splitlines() sentence = Sentence.from_stanford_dependencies(sample_deps, tree4) assert len(sentence) == 6 assert ( sentence.as_dotgraph().source == """ digraph { 0 [label=root] 1 [label=A] 2 -> 1 [label=det] 2 [label=burrito] 0 -> 2 [label=root] 4 [label=beans] 2 -> 4 [label=prep_with] 7 [label=chicken] 2 -> 7 [label=prep_with] 4 -> 7 [label=conj_negcc] 8 [label="."] 2 -> 8 [label=punct] } """.strip() )
def test_read_sd_sentence_punct(): sample_deps = """ root(ROOT-0, Sentences-1) punct(Sentences-1, :-2) dep(Sentences-1, words-3) punct(sometimes-8, -LRB--4) prep(sometimes-8, with-5) pobj(with-5, punctuation-6) punct(sometimes-8, ---7) dep(words-3, sometimes-8) punct(sometimes-8, -RRB--9) punct(Sentences-1, .-10) """.strip().splitlines() tree = """(ROOT (NP (NP (NNS Sentences)) (: :) (NP (NP (NNS words)) (PRN (-LRB- -LRB-) (FRAG (PP (IN with) (NP (NN punctuation))) (: --) (ADVP (RB sometimes))) (-RRB- -RRB-))) (. .)))""" output = """ Token(index=1, form='Sentences', cpos='NNS', pos='NNS', head=0, deprel='root') Token(index=2, form=':', cpos=':', pos=':', head=1, deprel='punct') Token(index=3, form='words', cpos='NNS', pos='NNS', head=1, deprel='dep') Token(index=4, form='-LRB-', cpos='-LRB-', pos='-LRB-', head=8, deprel='punct') Token(index=5, form='with', cpos='IN', pos='IN', head=8, deprel='prep') Token(index=6, form='punctuation', cpos='NN', pos='NN', head=5, deprel='pobj') Token(index=7, form='--', cpos=':', pos=':', head=8, deprel='punct') Token(index=8, form='sometimes', cpos='RB', pos='RB', head=3, deprel='dep') Token(index=9, form='-RRB-', cpos='-RRB-', pos='-RRB-', head=8, deprel='punct') Token(index=10, form='.', cpos='.', pos='.', head=1, deprel='punct') """.strip() sentence = Sentence.from_stanford_dependencies(sample_deps, tree) assert stringify_sentence(sentence) == output output_no_punct = """ Token(index=1, form='Sentences', cpos='NNS', pos='NNS', head=0, deprel='root') Token(index=3, form='words', cpos='NNS', pos='NNS', head=1, deprel='dep') Token(index=5, form='with', cpos='IN', pos='IN', head=8, deprel='prep') Token(index=6, form='punctuation', cpos='NN', pos='NN', head=5, deprel='pobj') Token(index=8, form='sometimes', cpos='RB', pos='RB', head=3, deprel='dep') """.strip() sentence2 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=False) assert stringify_sentence(sentence2) == output_no_punct sentence3 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=False, include_erased=True) assert stringify_sentence(sentence3) == output_no_punct sentence4 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=True, include_erased=True) assert stringify_sentence(sentence4) == output tree2 = "(ROOT(NP(NP-SBJ(NNS Sentences))(: :)(NP(NP(NNS words))(PRN(-LRB- -LRB-)(FRAG(PP(IN with)(NP(NN punctuation)))(: --)(ADVP(RB sometimes)))(-RRB- -RRB-)))(. .)))" sentence5 = Sentence.from_stanford_dependencies(sample_deps, tree2) assert sentence5 == sentence tree3 = "((NP(NP(NNS Sentences))(: :)(NP(NP(NNS words))(PRN(-LRB- -LRB-)(FRAG(PP(IN with)(NP(NN punctuation)))(: --)(ADVP(RB sometimes)))(-RRB- -RRB-)))(. .)))" sentence6 = Sentence.from_stanford_dependencies(sample_deps, tree3) assert sentence6 == sentence tree4 = " ( ROOT(NP ( NP-SBJ (NNS Sentences))(: :)(\nNP\n(NP(NNS\nwords)) (PRN (-LRB- -LRB-)(FRAG(PP (IN with\n)\t(NP(NN punctuation )))(: --)(ADVP( RB sometimes )))(-RRB- \t-RRB-)))(.\n\n\t.))) " sentence7 = Sentence.from_stanford_dependencies(sample_deps, tree4) assert sentence7 == sentence
def test_read_sd_sentence(): sample_deps = """ det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) """.strip().splitlines() sentence = Sentence.from_stanford_dependencies(sample_deps, tree4) assert stringify_sentence(sentence) == tree4_out_CCprocessed
def test_read_sd_sentence_sorting(): # same as test_read_sd_sentence but check sorting sample_deps = ''' punct(burrito-2, .-8) conj_negcc(beans-4, chicken-7) prep_with(burrito-2, chicken-7) prep_with(burrito-2, beans-4) root(ROOT-0, burrito-2) det(burrito-2, A-1) '''.strip().splitlines() sentence = Sentence.from_stanford_dependencies(sample_deps, trees_sd.tree4) assert stringify_sentence(sentence) == trees_sd.tree4_out_CCprocessed
def test_read_sd_sentence_extra_space(): sample_deps = ''' det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) '''.splitlines() sentence = Sentence.from_stanford_dependencies(sample_deps, trees_sd.tree4) assert stringify_sentence(sentence) == trees_sd.tree4_out_CCprocessed
def test_conll_as_asciitree_nontree(): sample_deps = ''' det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) '''.strip().splitlines() sentence = Sentence.from_stanford_dependencies(sample_deps, trees_sd.tree4) assert len(sentence) == 6 assert sentence.as_asciitree().strip() == ''' burrito [root] +-- A [det] +-- beans [prep_with] | +-- chicken [conj_negcc] +-- chicken [prep_with] +-- . [punct] '''.strip()
def test_read_sd_sentence_punct(): sample_deps = ''' root(ROOT-0, Sentences-1) punct(Sentences-1, :-2) dep(Sentences-1, words-3) punct(sometimes-8, -LRB--4) prep(sometimes-8, with-5) pobj(with-5, punctuation-6) punct(sometimes-8, ---7) dep(words-3, sometimes-8) punct(sometimes-8, -RRB--9) punct(Sentences-1, .-10) '''.strip().splitlines() tree = '''(ROOT (NP (NP (NNS Sentences)) (: :) (NP (NP (NNS words)) (PRN (-LRB- -LRB-) (FRAG (PP (IN with) (NP (NN punctuation))) (: --) (ADVP (RB sometimes))) (-RRB- -RRB-))) (. .)))''' output = ''' Token(index=1, form='Sentences', cpos='NNS', pos='NNS', head=0, deprel='root') Token(index=2, form=':', cpos=':', pos=':', head=1, deprel='punct') Token(index=3, form='words', cpos='NNS', pos='NNS', head=1, deprel='dep') Token(index=4, form='-LRB-', cpos='-LRB-', pos='-LRB-', head=8, deprel='punct') Token(index=5, form='with', cpos='IN', pos='IN', head=8, deprel='prep') Token(index=6, form='punctuation', cpos='NN', pos='NN', head=5, deprel='pobj') Token(index=7, form='--', cpos=':', pos=':', head=8, deprel='punct') Token(index=8, form='sometimes', cpos='RB', pos='RB', head=3, deprel='dep') Token(index=9, form='-RRB-', cpos='-RRB-', pos='-RRB-', head=8, deprel='punct') Token(index=10, form='.', cpos='.', pos='.', head=1, deprel='punct') '''.strip() sentence = Sentence.from_stanford_dependencies(sample_deps, tree) assert stringify_sentence(sentence) == output output_no_punct = ''' Token(index=1, form='Sentences', cpos='NNS', pos='NNS', head=0, deprel='root') Token(index=3, form='words', cpos='NNS', pos='NNS', head=1, deprel='dep') Token(index=5, form='with', cpos='IN', pos='IN', head=8, deprel='prep') Token(index=6, form='punctuation', cpos='NN', pos='NN', head=5, deprel='pobj') Token(index=8, form='sometimes', cpos='RB', pos='RB', head=3, deprel='dep') '''.strip() sentence2 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=False) assert stringify_sentence(sentence2) == output_no_punct sentence3 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=False, include_erased=True) assert stringify_sentence(sentence3) == output_no_punct sentence4 = Sentence.from_stanford_dependencies(sample_deps, tree, include_punct=True, include_erased=True) assert stringify_sentence(sentence4) == output tree2 = '(ROOT(NP(NP-SBJ(NNS Sentences))(: :)(NP(NP(NNS words))(PRN(-LRB- -LRB-)(FRAG(PP(IN with)(NP(NN punctuation)))(: --)(ADVP(RB sometimes)))(-RRB- -RRB-)))(. .)))' sentence5 = Sentence.from_stanford_dependencies(sample_deps, tree2) assert sentence5 == sentence tree3 = '((NP(NP(NNS Sentences))(: :)(NP(NP(NNS words))(PRN(-LRB- -LRB-)(FRAG(PP(IN with)(NP(NN punctuation)))(: --)(ADVP(RB sometimes)))(-RRB- -RRB-)))(. .)))' sentence6 = Sentence.from_stanford_dependencies(sample_deps, tree3) assert sentence6 == sentence tree4 = ' ( ROOT(NP ( NP-SBJ (NNS Sentences))(: :)(\nNP\n(NP(NNS\nwords)) (PRN (-LRB- -LRB-)(FRAG(PP (IN with\n)\t(NP(NN punctuation )))(: --)(ADVP( RB sometimes )))(-RRB- \t-RRB-)))(.\n\n\t.))) ' sentence7 = Sentence.from_stanford_dependencies(sample_deps, tree4) assert sentence7 == sentence