def test_get_conll_deprel_tags_arbitrary_num_tokens_num_dp_not_found(): t = "Other great tool" # without this memoization, style fails style_tool = FakeTokenizationAndDependency.default_tool for num_dp in xrange(1, 4): for num_tokens in xrange(1, 10): tokenization = FakeTokenizationAndDependency(num_tokens, num_dp=num_dp, t=t).tokenization # no tool given, with valid index: return indexed parse assert cdt(tokenization, 0, None) == [u"edge_" + unicode(i) + "/0" for i in xrange(num_tokens)] # tools given, with valid index, but not found assert cdt(tokenization, 0, style_tool) == [""] * num_tokens assert cdt(tokenization, 0, "") == [""] * num_tokens # invalid index, no tool given: return empty assert cdt(tokenization, -1, None) == [""] * num_tokens for i in xrange(num_dp): # no tool given, valid index: return specified parse assert cdt(tokenization, i, None) == [ u"edge_" + unicode(j) + "/" + unicode(i) for j in xrange(num_tokens)] # tool given, valid index, not found: return empty for tool in ("", FakeTokenizationAndDependency.default_tool): assert cdt(tokenization, i, tool) == [""] * num_tokens # invalid index: return empty for tool in (None, "", FakeTokenizationAndDependency.default_tool): assert cdt(tokenization, num_dp, tool) == [""] * num_tokens
def test_get_conll_deprel_tags_arbitrary_num_tokens_not_found(): t = "Other great tool" for num_tokens in xrange(1, 10): tokenization = FakeTokenizationAndDependency(num_tokens, t=t).tokenization assert cdt(tokenization, 0, None) == [u"edge_" + unicode(i) + "/0" for i in xrange(num_tokens)] assert ( cdt(tokenization, 0, FakeTokenizationAndDependency.default_tool) == [""] * num_tokens ) assert cdt(tokenization, 0, "") == [""] * num_tokens assert cdt(tokenization, -1, None) == [""] * num_tokens assert tokenization.tokenList assert cdt(tokenization, 0, None) == [u"edge_" + unicode(j) + "/0" for j in xrange(num_tokens)] for tool in ("", FakeTokenizationAndDependency.default_tool): assert cdt(tokenization, 0, tool) == [""] * num_tokens for i in xrange(1, num_tokens+1): for tool in (None, "", FakeTokenizationAndDependency.default_tool): assert cdt(tokenization, i, tool) == [""] * num_tokens
def test_get_conll_deprel_tags_arbitrary_num_tokens_num_dp_found(): # sigh... memoize to let style checks pass style_tool = FakeTokenizationAndDependency.default_tool for num_dp in xrange(1, 4): for num_tokens in xrange(1, 10): tokenization = FakeTokenizationAndDependency( num_tokens, num_dp=num_dp).tokenization # no tool given, with valid index: return indexed parse assert cdt(tokenization, 0, None) == [u"edge_" + unicode(i) + "/0" for i in xrange(num_tokens)] # tool given, with valid index, not found assert cdt(tokenization, 0, style_tool) == [ u"edge_" + unicode(i) + "/0" for i in xrange(num_tokens)] assert cdt(tokenization, 0, "") == [""] * num_tokens # invalid index, no tool given: return empty assert cdt(tokenization, -1, None) == [""] * num_tokens for i in xrange(num_dp): # tool given, valid index, not found: return empty assert cdt(tokenization, i, "") == [""] * num_tokens # tool none, valid index, found: return requested assert cdt(tokenization, i, None) == [ u"edge_" + unicode(j) + "/" + unicode(i) for j in xrange(num_tokens)] # tool given, valid index, found: return correct assert cdt(tokenization, i, style_tool) == [ u"edge_" + unicode(j) + "/0" for j in xrange(num_tokens)] # invalid index: return empty for tool in (None, ""): assert cdt(tokenization, num_dp, tool) == [""] * num_tokens assert cdt(tokenization, num_dp, style_tool) == [ u"edge_" + unicode(j) + "/0" for j in xrange(num_tokens)]
def test_get_conll_deprel_tags_one_tokens_found(): tokenization = FakeTokenizationAndDependency(1).tokenization assert cdt(tokenization, 0, None) == [u"edge_0/0"] assert ( cdt(tokenization, 0, FakeTokenizationAndDependency.default_tool) == [u"edge_0/0"]) assert cdt(tokenization, 0, "") == [""] for tool in (None, ""): assert cdt(tokenization, -1, tool) == [""] assert ( cdt(tokenization, -1, FakeTokenizationAndDependency.default_tool) == [u"edge_0/0"]) assert ( cdt(tokenization, 1, FakeTokenizationAndDependency.default_tool) == [u"edge_0/0"]) for tool in (None, ""): assert cdt(tokenization, 1, tool) == [""]
def test_get_conll_deprel_tags_zero_tokens(): tokenization = FakeTokenizationAndDependency(0).tokenization for i in xrange(-1, 2): for tool in (None, "", FakeTokenizationAndDependency.default_tool): assert cdt(tokenization, i, tool) == []