Ejemplo n.º 1
0
def test_get_conll_deprel_tags_arbitrary_num_tokens_num_dp_not_found():
    t = "Other great tool"
    # without this memoization, style fails
    style_tool = FakeTokenizationAndDependency.default_tool
    for num_dp in xrange(1, 4):
        for num_tokens in xrange(1, 10):
            tokenization = FakeTokenizationAndDependency(num_tokens,
                                                         num_dp=num_dp,
                                                         t=t).tokenization

            # no tool given, with valid index: return indexed parse
            assert cdt(tokenization, 0, None) == [u"edge_" + unicode(i) + "/0"
                                                  for i in xrange(num_tokens)]
            # tools given, with valid index, but not found
            assert cdt(tokenization, 0, style_tool) == [""] * num_tokens
            assert cdt(tokenization, 0, "") == [""] * num_tokens

            # invalid index, no tool given: return empty
            assert cdt(tokenization, -1, None) == [""] * num_tokens

            for i in xrange(num_dp):
                # no tool given, valid index: return specified parse
                assert cdt(tokenization, i, None) == [
                    u"edge_" + unicode(j) + "/" + unicode(i)
                    for j in xrange(num_tokens)]
                # tool given, valid index, not found: return empty
                for tool in ("",
                             FakeTokenizationAndDependency.default_tool):
                    assert cdt(tokenization, i, tool) == [""] * num_tokens

            # invalid index: return empty
            for tool in (None, "",
                         FakeTokenizationAndDependency.default_tool):
                assert cdt(tokenization, num_dp, tool) == [""] * num_tokens
Ejemplo n.º 2
0
def test_get_conll_deprel_tags_arbitrary_num_tokens_not_found():
    t = "Other great tool"
    for num_tokens in xrange(1, 10):
        tokenization = FakeTokenizationAndDependency(num_tokens,
                                                     t=t).tokenization
        assert cdt(tokenization, 0, None) == [u"edge_" + unicode(i) + "/0"
                                              for i in xrange(num_tokens)]
        assert (
            cdt(tokenization, 0, FakeTokenizationAndDependency.default_tool) ==
            [""] * num_tokens
        )
        assert cdt(tokenization, 0, "") == [""] * num_tokens

        assert cdt(tokenization, -1, None) == [""] * num_tokens
        assert tokenization.tokenList

        assert cdt(tokenization, 0, None) == [u"edge_" + unicode(j) + "/0"
                                              for j in xrange(num_tokens)]
        for tool in ("", FakeTokenizationAndDependency.default_tool):
            assert cdt(tokenization, 0, tool) == [""] * num_tokens

        for i in xrange(1, num_tokens+1):
            for tool in (None, "",
                         FakeTokenizationAndDependency.default_tool):
                assert cdt(tokenization, i, tool) == [""] * num_tokens
Ejemplo n.º 3
0
def test_get_conll_deprel_tags_arbitrary_num_tokens_num_dp_found():
    # sigh... memoize to let style checks pass
    style_tool = FakeTokenizationAndDependency.default_tool
    for num_dp in xrange(1, 4):
        for num_tokens in xrange(1, 10):
            tokenization = FakeTokenizationAndDependency(
                num_tokens,
                num_dp=num_dp).tokenization

            # no tool given, with valid index: return indexed parse
            assert cdt(tokenization, 0, None) == [u"edge_" + unicode(i) + "/0"
                                                  for i in xrange(num_tokens)]
            # tool given, with valid index, not found
            assert cdt(tokenization, 0, style_tool) == [
                u"edge_" + unicode(i) + "/0" for i in xrange(num_tokens)]
            assert cdt(tokenization, 0, "") == [""] * num_tokens

            # invalid index, no tool given: return empty
            assert cdt(tokenization, -1, None) == [""] * num_tokens

            for i in xrange(num_dp):
                # tool given, valid index, not found: return empty
                assert cdt(tokenization, i, "") == [""] * num_tokens
                # tool none, valid index, found: return requested
                assert cdt(tokenization, i, None) == [
                    u"edge_" + unicode(j) + "/" + unicode(i)
                    for j in xrange(num_tokens)]
                # tool given, valid index, found: return correct
                assert cdt(tokenization, i, style_tool) == [
                    u"edge_" + unicode(j) + "/0" for j in xrange(num_tokens)]

            # invalid index: return empty
            for tool in (None, ""):
                assert cdt(tokenization, num_dp, tool) == [""] * num_tokens

            assert cdt(tokenization, num_dp, style_tool) == [
                u"edge_" + unicode(j) + "/0" for j in xrange(num_tokens)]
Ejemplo n.º 4
0
def test_get_conll_deprel_tags_one_tokens_found():
    tokenization = FakeTokenizationAndDependency(1).tokenization
    assert cdt(tokenization, 0, None) == [u"edge_0/0"]
    assert (
        cdt(tokenization, 0, FakeTokenizationAndDependency.default_tool) ==
        [u"edge_0/0"])
    assert cdt(tokenization, 0, "") == [""]

    for tool in (None, ""):
        assert cdt(tokenization, -1, tool) == [""]

    assert (
        cdt(tokenization, -1, FakeTokenizationAndDependency.default_tool) ==
        [u"edge_0/0"])

    assert (
        cdt(tokenization, 1, FakeTokenizationAndDependency.default_tool) ==
        [u"edge_0/0"])
    for tool in (None, ""):
        assert cdt(tokenization, 1, tool) == [""]
Ejemplo n.º 5
0
def test_get_conll_deprel_tags_zero_tokens():
    tokenization = FakeTokenizationAndDependency(0).tokenization
    for i in xrange(-1, 2):
        for tool in (None, "", FakeTokenizationAndDependency.default_tool):
            assert cdt(tokenization, i, tool) == []