Exemple #1
0
def test_token_count():
    # default space delimiter
    strs = nvstrings.to_device([
        "the quick brown fox jumped over the lazy brown dog",
        "the sable siamésé cat jumped under the brown sofa", None, ""
    ])
    outcome = nvtext.token_count(strs)
    expected = [10, 9, 0, 0]
    assert outcome == expected

    # custom delimiter
    outcome = nvtext.token_count(strs, delimiter='o')
    expected = [6, 3, 0, 0]
    assert outcome == expected

    # test device pointer
    outcome_darray = rmm.device_array(strs.size(), dtype=np.int32)
    nvtext.token_count(strs, devptr=outcome_darray.device_ctypes_pointer.value)
    expected = [10, 9, 0, 0]
    assert np.array_equal(outcome_darray.copy_to_host(), expected)
Exemple #2
0
def test_token_count():
    # default space delimiter
    strs = nvstrings.to_device([
        "the quick brown fox jumped over the lazy brown dog",
        "the sable siamésé cat jumped under the brown sofa",
        None,
        "",
        "test_str\x01test_str\x02test_str\x03test_str\x04test_str\x05",
    ])
    outcome = nvtext.token_count(strs)
    expected = [10, 9, 0, 0, 5]
    assert outcome == expected

    # custom delimiter
    outcome = nvtext.token_count(strs, delimiter="o")
    expected = [6, 3, 0, 0, 1]
    assert outcome == expected

    # test device pointer
    outcome_darray = rmm.device_array(strs.size(), dtype=np.int32)
    nvtext.token_count(strs, devptr=outcome_darray.device_ctypes_pointer.value)
    expected = [10, 9, 0, 0, 5]
    assert np.array_equal(outcome_darray.copy_to_host(), expected)

    # test multi char delimiter
    got = nvtext.token_count(strs, delimiter=["a", "e", "i", "o", "u"])
    expected = [14, 15, 0, 0, 6]
    assert got == expected

    # test empty list of delimiter
    got = nvtext.token_count(strs, delimiter=[])
    expected = [10, 9, 0, 0, 5]
    assert got == expected

    # test device pointer
    got_darray = rmm.device_array(strs.size(), dtype=np.int32)
    nvtext.token_count(
        strs,
        delimiter=["a", "e", "i", "o"],
        devptr=got_darray.device_ctypes_pointer.value,
    )
    expected = [12, 13, 0, 0, 6]
    assert np.array_equal(got_darray.copy_to_host(), expected)