Example #1
0
def test_project_invalid_span():
    src_tokens = ["Members", "of", "the", "House", "clapped", "their", "hands"]
    tgt_tokens = [
        "Members", "Ġof", "Ġthe", "ĠHouse", "Ġcl", "apped", "Ġtheir", "Ġhands"
    ]
    # reference: tgt_token_index = [[0], [1], [2], [3], [4, 5], [6], [7]]
    ta = TokenAligner(src_tokens, tgt_tokens)
    with pytest.raises(ValueError):
        ta.project_span(0, 0)
Example #2
0
def test_token_aligner_project_span_last_token_range_is_end_exclusive():
    source_tokens = ["abc", "def", "ghi", "jkl"]
    target_tokens = ["abc", "d", "ef", "ghi", "jkl"]
    ta = TokenAligner(source_tokens, target_tokens)
    m = ta.project_span(3, 4)
    m_expected = np.array([4, 5])
    assert (m == m_expected).all()
Example #3
0
def test_token_aligner_project_span():
    source_tokens = ["abc", "def", "ghi", "jkl"]
    target_tokens = ["abc", "d", "ef", "ghi", "jkl"]
    ta = TokenAligner(source_tokens, target_tokens)
    m = ta.project_span(1, 2)
    m_expected = np.array([1, 3])
    assert (m == m_expected).all()
Example #4
0
def test_project_span_covering_whole_sequence():
    src_tokens = ["Members", "of", "the", "House", "clapped", "their", "hands"]
    tgt_tokens = [
        "Members", "Ġof", "Ġthe", "ĠHouse", "Ġcl", "apped", "Ġtheir", "Ġhands"
    ]
    # reference: tgt_token_index = [[0], [1], [2], [3], [4, 5], [6], [7]]
    ta = TokenAligner(src_tokens, tgt_tokens)
    assert (0, 8) == ta.project_span(0, 7)