예제 #1
0
def test_verify_bool():
    ex = "Wherefore art thou Romeo! Wherefore art thou Romeo. Wherefore art thou Romeo?"
    with pytest.raises(TypeError) as e:
        text_grams(ex, stop_remove="text")
    assert str(
        e.value
    ) == "stop_remove, remove_punctuation, remove_number and case_sensitive must be boolean"
예제 #2
0
def test_verify_k():
    ex = "Wherefore art thou Romeo! Wherefore art thou Romeo. Wherefore art thou Romeo?"
    ex_output1 = text_grams(ex, k=0)
    assert ex_output1.shape[0] == 0  # If k is 0, dataframe should be empty

    with pytest.raises(ValueError) as e:
        text_grams(ex, k=-1)
    assert str(e.value) == "k must be 0 or greater"
예제 #3
0
def test_verify_n():
    ex = "Wherefore art thou Romeo! Wherefore art thou Romeo. Wherefore art thou Romeo?"
    with pytest.raises(ValueError) as e:
        text_grams(ex, n=[])
    assert str(e.value) == "n must have at least one positive value"

    with pytest.raises(ValueError) as e:
        text_grams(ex, n=[-1])
    assert str(e.value) == "Values of n must be greater than 0"
예제 #4
0
def test_verify_input2():
    """
    Test if input is not empty
    """

    text = " "

    with pytest.raises(ValueError) as e:
        text_grams(text)
    assert str(e.value) == "Input text is empty."
예제 #5
0
def test_verify_input1():
    """
    Test if input string is valid (not numeric)
    """

    text = 100

    with pytest.raises(ValueError) as e:
        text_grams(text)
    assert str(e.value) == "Input must be a string"
예제 #6
0
def test_normal_function():
    ex = "Today is a sunny day. We should go to a beach on this sunny day"
    k = 1
    n = [2]
    ex_output = text_grams(ex, k, n)
    assert ex_output["2gram"][0] == sample_out['2gram'][0]
    assert len(ex_output['2gram'][0]) == n[0]
    assert ex_output.shape[0] == sample_out.shape[0]

    n = 2
    ex_output = text_grams(ex, k, n)
    ex_output
    assert ex_output["2gram"][0] == ("sunny", "day")
    assert ex_output["Number of Instances"][0] == 2
예제 #7
0
def test_verify_output():
    ex = "Today is a sunny day. We should go to a beach on this sunny day"
    k = 1
    n = [2]
    ex_output = text_grams(ex, k, n)
    assert type(ex_output) == type(sample_out)
    assert type(ex_output['2gram'][0]) == tuple
    assert type(ex_output['Number of Instances'][0]) == numpy.int64
예제 #8
0
def test_verify_n():
    ex = "Wherefore art thou Romeo! Wherefore art thou Romeo. Wherefore art thou Romeo?"
    with pytest.raises(TypeError) as e:
        text_grams(ex, n="text")
    assert str(e.value) == "n must be an integer list"

    with pytest.raises(ValueError) as e:
        text_grams(ex, n=[-1, -2])
    assert str(e.value) == "Values of n must be greater than 0"

    with pytest.raises(ValueError) as e:
        text_grams(ex, n=-1)
    assert str(e.value) == "n must be 0 or greater"
예제 #9
0
def test_verify_case_sensitive():
    ex = "Hey Guys. Hey Guys. hey guys. hey guys."
    ex_output = text_grams(ex, case_sensitive=True)
    assert ex_output.shape[
        0] > 1  # If case_sensitive is true, only (hey, guys) will be considered
예제 #10
0
def test_verify_remove_number():
    ex = "This is 123 a sentence. This is 123 also a sentence. This is 123 also a sentence."
    ex_output = text_grams(ex, remove_number=False)
    assert ex_output.shape[
        0] > 1  # If numbers is removed, only (also, sentence) will be considered
예제 #11
0
def test_verify_remove_punctuation():
    ex = "This is / a sentence. This is / also a sentence. This is / also a sentence."
    ex_output = text_grams(ex, remove_punctuation=False)
    assert ex_output.shape[
        0] > 1  # If punctuation is removed, only (also, sentence) will be considered
예제 #12
0
def test_verify_stop_remove():
    ex = "This is a sentence. This is also a sentence. This is also a sentence."
    ex_output = text_grams(ex, stop_remove=False)
    assert ex_output.shape[
        0] > 1  # If stopwords are removed, only (also, sentence) will be considered
예제 #13
0
def test_verify_sentence_endings():
    ex = "Wherefore art thou Romeo! Wherefore art thou Romeo. Wherefore art thou Romeo?"
    ex_output = text_grams(ex)
    assert ex_output.shape[
        0] != 4  # If it split sentences properly, (Romeo, Wherefore) will not be a gram combination
예제 #14
0
def test_verify_n_larger_than_sentence():
    ex = "short sentence. short sentence."
    ex_output = text_grams(ex, n=[3])
    assert ex_output.shape[
        0] == 0  # If n is larger than sentence length, dataframe should be empty