예제 #1
0
def test_large_whitespace():
    assert list(tokenize("Massive gap.  Here.")) == [Token('Massive', 'word'),
                                                     Token(' ', 'whitespace'),
                                                     Token('gap', 'word'),
                                                     Token('.', 'punctuation'),
                                                     Token('  ', 'whitespace'),
                                                     Token('Here', 'word'),
                                                     Token('.', 'punctuation')]
예제 #2
0
def test_two_words():
    assert list(tokenize("Hello World")) == [Token("Hello", "word"),
                                             Token(" ", "whitespace"),
                                             Token("World", "word")]
예제 #3
0
def test_punctuation_then_word():
    assert list(tokenize("!word")) == [Token('!', 'punctuation'),
                                       Token('word', 'word')]
예제 #4
0
def test_digit_and_punctuation():
    assert list(tokenize("1:02:08")) == [Token('1', 'digit'),
                                         Token(':', 'punctuation'),
                                         Token('02', 'digit'),
                                         Token(':', 'punctuation'),
                                         Token('08', 'digit')]
예제 #5
0
def test_whitespace_with_punctuation():
    assert list(tokenize(".  ..  .")) == [Token('.', 'punctuation'),
                                          Token('  ', 'whitespace'),
                                          Token('..', 'punctuation'),
                                          Token('  ', 'whitespace'),
                                          Token('.', 'punctuation')]
예제 #6
0
def test_hybrid_number_with_word():
    assert list(tokenize("7zip")) == [Token("7zip", "word")]
예제 #7
0
def test_two_numbers():
    assert list(tokenize("1 22")) == [Token("1", "digit"),
                                      Token(" ", "whitespace"),
                                      Token("22", "digit")]
예제 #8
0
def test_single_word():
    assert list(tokenize("Hello")) == [Token("Hello", "word")]
예제 #9
0
def test_number_then_punctuation():
    assert list(tokenize("125!!")) == [Token("125", "digit"),
                                       Token("!!", "punctuation")]
예제 #10
0
def test_number():
    assert list(tokenize("125")) == [Token("125", "digit")]
예제 #11
0
def test_hybrid_word_with_number():
    assert list(tokenize("Area51 blah")) == [Token("Area51", "word"),
                                             Token(" ", "whitespace"),
                                             Token("blah", "word")]
예제 #12
0
def test_hypenated_word():
    assert list(tokenize("Hello-world")) == [Token("Hello-world", "word")]
예제 #13
0
def test_word_then_punctuation():
    assert list(tokenize("Hello.")) == [Token("Hello", "word"),
                                        Token(".", "punctuation")]