Example #1
0
def test_replace():
    '''Testing replacements in tokenization method'''
    tok = Tokenizer()
    assert tok.decontract("hey gimme") == 'hey give me'
    assert tok.decontract("hey let's") == 'hey let_us'
    assert tok.decontract("hey wanna go") == 'hey want to go'
    assert tok.decontract("hey gotta go") == 'hey got to go'
    assert tok.decontract("hey gonna go") == 'hey going to go'
    assert tok.decontract("hey cannot go") == 'hey can not go'
    assert tok.decontract("lookit here") == 'look at here'
    assert tok.tokenize("hey gimme") == ['hey', 'give', 'me']
    assert tok.tokenize("hey let's") == ['hey', 'let_us']
    assert tok.tokenize("hey wanna go") == ['hey', 'want', 'to', 'go']
    assert tok.tokenize("hey gotta go") == ['hey', 'got', 'to', 'go']
    assert tok.tokenize("hey gonna go") == ['hey', 'going', 'to', 'go']
    assert tok.tokenize("hey cannot go") == ['hey', 'can', 'not', 'go']
    assert tok.tokenize("lookit here") == ['look', 'at', 'here']