def test_read_sd_corpus_multiple_extra_space(): sample_deps = ''' det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) nsubj(cooks-2, Ed-1) nsubj(sells-4, Ed-1) root(ROOT-0, cooks-2) conj_and(cooks-2, sells-4) dobj(cooks-2, burritos-5) prep_with(burritos-5, beans-7) prep_with(burritos-5, rice-10) conj_negcc(beans-7, rice-10) punct(cooks-2, .-11) '''.splitlines() corpus = Corpus.from_stanford_dependencies( sample_deps, [trees_sd.tree4, trees_sd.tree5]) assert len(corpus) == 2 assert stringify_sentence(corpus[0]) == trees_sd.tree4_out_CCprocessed assert stringify_sentence(corpus[1]) == trees_sd.tree5_out_CCprocessed
def test_read_sd_corpus_multiple_extra_space(): sample_deps = ''' det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) nsubj(cooks-2, Ed-1) nsubj(sells-4, Ed-1) root(ROOT-0, cooks-2) conj_and(cooks-2, sells-4) dobj(cooks-2, burritos-5) prep_with(burritos-5, beans-7) prep_with(burritos-5, rice-10) conj_negcc(beans-7, rice-10) punct(cooks-2, .-11) '''.splitlines() corpus = Corpus.from_stanford_dependencies(sample_deps, [trees_sd.tree4, trees_sd.tree5]) assert len(corpus) == 2 assert stringify_sentence(corpus[0]) == trees_sd.tree4_out_CCprocessed assert stringify_sentence(corpus[1]) == trees_sd.tree5_out_CCprocessed
def test_read_sd_corpus_single(): sample_deps = ''' det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) '''.strip().splitlines() corpus = Corpus.from_stanford_dependencies(sample_deps, [trees_sd.tree4]) assert len(corpus) == 1 assert stringify_sentence(corpus[0]) == trees_sd.tree4_out_CCprocessed
def test_read_sd_corpus_single(): sample_deps = """ det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) """.strip().splitlines() corpus = Corpus.from_stanford_dependencies(sample_deps, [tree4]) assert len(corpus) == 1 assert stringify_sentence(corpus[0]) == tree4_out_CCprocessed