Example #1
0
def test_tokenize():
    # a snippet from Hitchhiker's Guide that just happens to have
    # most of the examples of punctuation we're looking for.
    #
    # TODO: test wacky behavior with "n't" and "cannot" and stuff.
    text1 = "Time is an illusion. Lunchtime, doubly so."
    text2 = (
        '"Very deep," said Arthur, "you should send that in to the '
        "Reader's Digest. They've got a page for people like you.\""
    )
    eq_(tokenize(text1), ["Time", "is", "an", "illusion", ".", "Lunchtime", ",", "doubly", "so", "."])
    eq_(untokenize(tokenize(text1)), text1)
    if nltk.__version__ >= "3":
        eq_(untokenize(tokenize(text2)), text2)
Example #2
0
def test_tokenize():
    # a snippet from Hitchhiker's Guide that just happens to have
    # most of the examples of punctuation we're looking for.
    #
    # TODO: test wacky behavior with "n't" and "cannot" and stuff.
    text1 = "Time is an illusion. Lunchtime, doubly so."
    text2 = ('"Very deep," said Arthur, "you should send that in to the '
             'Reader\'s Digest. They\'ve got a page for people like you."')
    eq_(tokenize(text1),
        ['Time', 'is', 'an', 'illusion', '.', 'Lunchtime', ',',
         'doubly', 'so', '.']
    )
    eq_(untokenize(tokenize(text1)), text1)
    if nltk.__version__ >= '3':
        eq_(untokenize(tokenize(text2)), text2)
Example #3
0
def test_tokenize():
    # a snippet from Hitchhiker's Guide that just happens to have
    # most of the examples of punctuation we're looking for.
    #
    # TODO: test wacky behavior with "n't" and "cannot" and stuff.
    text1 = "Time is an illusion. Lunchtime, doubly so."
    text2 = ('"Very deep," said Arthur, "you should send that in to the '
             'Reader\'s Digest. They\'ve got a page for people like you."')
    eq_(tokenize(text1), [
        'Time', 'is', 'an', 'illusion', '.', 'Lunchtime', ',', 'doubly', 'so',
        '.'
    ])
    eq_(untokenize(tokenize(text1)), text1)
    if nltk.__version__ >= '3':
        eq_(untokenize(tokenize(text2)), text2)
Example #4
0
def normalize(text):
    """
    Get a string made from the non-stopword word stems in the text. See
    normalize_list().
    """
    return untokenize(normalize_list(text))
Example #5
0
def normalize(text):
    """
    Get a string made from the non-stopword word stems in the text. See
    normalize_list().
    """
    return untokenize(normalize_list(text))