예제 #1
0
def test_step3():
    word_str_ser_ls = [
        "triplicate",
        "formative",
        "formalize",
        "electriciti",
        "electriciti",
        "hopeful",
        "goodness",
    ]
    expect = [
        "triplic",
        "form",
        "formal",
        "electric",
        "electric",
        "hope",
        "good",
    ]

    word_str_ser = cudf.Series(word_str_ser_ls)
    st = PorterStemmer()
    got = st._step3(word_str_ser)
    assert list(got.to_pandas().values) == expect

    # mask test
    expect = expect[:-2] + ["hopeful", "goodness"]
    mask = cudf.Series([True] * (len(expect) - 2) + [False] * 2)
    got = st._step3(word_str_ser, mask)
    assert list(got.to_pandas().values) == expect
예제 #2
0
def test_step2():
    word_str_ser_ls = [
        "relational",
        "conditional",
        "rational",
        "valenci",
        "hesitanci",
        "digitizer",
        "conformabli",
        "radicalli",
        "differentli",
        "vileli",
        "analogousli",
        "vietnamization",
        "predication",
        "operator",
        "feudalism",
        "decisiveness",
        "hopefulness",
        "callousness",
        "formaliti",
        "sensitiviti",
        "sensibiliti",
    ]

    expect = [
        "relate",
        "condition",
        "rational",
        "valence",
        "hesitance",
        "digitize",
        "conformable",
        "radical",
        "different",
        "vile",
        "analogous",
        "vietnamize",
        "predicate",
        "operate",
        "feudal",
        "decisive",
        "hopeful",
        "callous",
        "formal",
        "sensitive",
        "sensible",
    ]

    word_str_ser = cudf.Series(word_str_ser_ls)
    st = PorterStemmer()
    got = st._step2(word_str_ser)
    assert list(got.to_pandas().values) == expect

    # mask test
    expect = expect[:-3] + ["formaliti", "sensitiviti", "sensibiliti"]
    mask = cudf.Series([True] * (len(expect) - 3) + [False] * 3)
    got = st._step2(word_str_ser, mask)
    assert list(got.to_pandas().values) == expect
예제 #3
0
def test_step4():
    word_str_ser_ls = [
        "revival",
        "allowance",
        "inference",
        "airliner",
        "gyroscopic",
        "adjustable",
        "defensible",
        "irritant",
        "replacement",
        "adjustment",
        "dependent",
        "adoption",
        "homologou",
        "communism",
        "activate",
        "angulariti",
        "homologous",
        "effective",
        "bowdlerize",
    ]

    expect = [
        "reviv",
        "allow",
        "infer",
        "airlin",
        "gyroscop",
        "adjust",
        "defens",
        "irrit",
        "replac",
        "adjust",
        "depend",
        "adopt",
        "homolog",
        "commun",
        "activ",
        "angular",
        "homolog",
        "effect",
        "bowdler",
    ]

    word_str_ser = cudf.Series(word_str_ser_ls)
    st = PorterStemmer()
    got = st._step4(word_str_ser)
    assert list(got.to_pandas().values) == expect

    # mask test
    expect = expect[:-2] + ["effective", "bowdlerize"]
    mask = cudf.Series([True] * (len(expect) - 2) + [False] * 2)
    got = st._step4(word_str_ser, mask)
    assert list(got.to_pandas().values) == expect
예제 #4
0
def test_step1c():
    word_str_ser_ls = ["happy", "sky", "enjoy", "boy", "toy", "y"]
    word_str_ser = cudf.Series(word_str_ser_ls)
    st = PorterStemmer()
    got = st._step1c(word_str_ser)

    expect = ["happi", "ski", "enjoy", "boy", "toy", "y"]
    assert list(got.to_pandas().values) == expect

    # mask test
    expect = ["happi", "sky", "enjoy", "boy", "toy", "y"]
    mask = cudf.Series([True, False, False, False, False, True])
    got = st._step1c(word_str_ser, mask)
    assert list(got.to_pandas().values) == expect
예제 #5
0
def test_step1b():
    word_str_ser_ls = [
        "feed",
        "agreed",
        "plastered",
        "bled",
        "motoring",
        "sing",
        "conflated",
        "troubled",
        "sized",
        "hopping",
        "tanned",
        "falling",
        "hissing",
        "fizzed",
        "failing",
        "filing",
    ]

    expected = [
        "feed",
        "agree",
        "plaster",
        "bled",
        "motor",
        "sing",
        "conflate",
        "trouble",
        "size",
        "hop",
        "tan",
        "fall",
        "hiss",
        "fizz",
        "fail",
        "file",
    ]

    word_str_ser = cudf.Series(word_str_ser_ls)
    st = PorterStemmer()
    got = st._step1b(word_str_ser)

    assert list(got.to_pandas().values) == expected

    # mask test
    expected = expected[:-3] + ["fizzed", "failing", "filing"]
    mask = cudf.Series([True] * (len(expected) - 3) + [False] * 3)
    got = st._step1b(word_str_ser, mask)
    assert list(got.to_pandas().values) == expected
예제 #6
0
def test_step5b():
    word_str_ser_ls = ["controll", "roll"]
    word_str_ser = cudf.Series(word_str_ser_ls)
    expect = ["control", "roll"]

    st = PorterStemmer()
    got = st._step5b(word_str_ser)
    assert list(got.to_pandas().values) == expect

    # mask test
    expect = ["controll", "roll"]
    mask = cudf.Series([False, True])
    got = st._step5b(word_str_ser, mask)
    assert list(got.to_pandas().values) == expect
예제 #7
0
def test_step5a():
    word_str_ser_ls = ["probate", "rate", "cease", "ones"]
    word_str_ser = cudf.Series(word_str_ser_ls)

    expect = ["probat", "rate", "ceas", "ones"]
    st = PorterStemmer()
    got = st._step5a(word_str_ser)
    assert list(got.to_pandas().values) == expect

    # mask test
    expect = expect[:-2] + ["cease", "ones"]
    mask = cudf.Series([True] * (len(expect) - 2) + [False] * 2)
    got = st._step5a(word_str_ser, mask)
    assert list(got.to_pandas().values) == expect
예제 #8
0
def test_step1a():
    word_str_ser = cudf.Series(
        ["caresses", "ponies", "ties", "caress", "cats"])

    st = PorterStemmer()
    got = st._step1a(word_str_ser)

    expect = ["caress", "poni", "tie", "caress", "cat"]
    assert list(got.to_pandas().values) == expect

    # mask test
    mask = cudf.Series([True, False, True, True, False])
    expect = ["caress", "ponies", "tie", "caress", "cats"]
    got = st._step1a(word_str_ser, mask)

    assert list(got.to_pandas().values) == expect