def test_sentences(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello World! I love go.' expected_tokens = ['Hello World!', 'I love go.'] create_single_output(filename, sentence) result = runner.invoke(text2sentences, [filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_text2words(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello World!\nI.\nnot sure where to go' expected_tokens = ['Hello', 'World', '!', 'I.', 'not', 'sure', 'where', 'to', 'go'] create_single_output(filename, sentence) result = runner.invoke(text2words, [filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_filterpunc(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\n.\nnot' expected_tokens = ['Hello', 'World', 'I', 'not'] create_single_output(filename, sentence) result = runner.invoke(filterpunc, [filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_punc(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\nlove,\ngo\n.' expected_tokens = ['!', ',', '.'] create_single_output(filename, sentence) result = runner.invoke(text2punc, [filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_words2ngrams(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\nlove\ngo\n.' expected_tokens = ['Hello World !', 'World ! I', '! I love', 'I love go'] create_single_output(filename, sentence) result = runner.invoke(words2ngrams, ['-n', 3, filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_filterwords(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\nam\nnot\na\ncrook\n.' expected_tokens = ['Hello', 'World', '!', 'crook', '.'] create_single_output(filename, sentence) result = runner.invoke(filterwords, ['--language', 'english', filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_top_bigrams(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'I\nworld\n!\nI\nlove\nyou\nthis\nworld\nand\nlove\nyou' create_single_output(filename, sentence) result = runner.invoke(tokens2topbigrams, [filename]) assert result.exit_code == 0 tokens = result.output.split('\n') assert tokens[0].split(',')[0:2] == ['love', 'you']
def test_uppercase(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\n.\nnoooo\n' expected_tokens = ['HELLO', 'WORLD', '!', 'I', '.', 'NOOOO'] create_single_output(filename, sentence) result = runner.invoke(tokens2upper, [filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_pos_tokens(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nworld\n!\nI\nlove\nthis\nworld\nand\nlove\nyou' expected_tokens = ['Hello,NNP', 'world,NN', '!,.', 'I,PRP', 'love,VBP', 'this,DT', 'world,NN', 'and,CC', 'love,VB', 'you,PRP'] create_single_output(filename, sentence) result = runner.invoke(tokens2pos, [filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_nonewlines(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\nam\nin.\n' expected_tokens = ['Hello World ! I am in.'] create_single_output(filename, sentence) result = runner.invoke(nonewlines, [filename]) tokens = result.output.split('\n') assert result.exit_code == 0 assert len(result.output.split('\n')) == 2 compare_results(tokens, expected_tokens)
def test_pos_tokens(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nworld\n!\nI\nlove\nthis\nworld\nand\nlove\nyou' expected_tokens = [ 'Hello,NNP', 'world,NN', '!,.', 'I,PRP', 'love,VBP', 'this,DT', 'world,NN', 'and,CC', 'love,VB', 'you,PRP' ] create_single_output(filename, sentence) result = runner.invoke(tokens2pos, [filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_count_tokens(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello,\nworld\n!\nI\nlove\nthis\nworld\nand\nlove\nyou' expected_tokens = ['love,2', 'world,2', 'and,1', 'I,1', 'you,1', 'this,1', '\"Hello,\",1', '!,1', ''] expected_tokens.sort() create_single_output(filename, sentence) result = runner.invoke(tokens2counts, [filename]) tokens = result.output.split('\n') tokens.sort() assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_count_tokens(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello,\nworld\n!\nI\nlove\nthis\nworld\nand\nlove\nyou' expected_tokens = [ 'love,2', 'world,2', 'and,1', 'I,1', 'you,1', 'this,1', '\"Hello,\",1', '!,1', '' ] expected_tokens.sort() create_single_output(filename, sentence) result = runner.invoke(tokens2counts, [filename]) tokens = result.output.split('\n') tokens.sort() assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_filterwords_custom(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\nam\nnot\na\ncrook\n.' expected_tokens = ['World','!','crook','.'] custom_stopword_filename = 'custom.txt' custom_stopwords = 'hello\n' create_single_output(filename, sentence) create_single_output(custom_stopword_filename, custom_stopwords) result = runner.invoke(filterwords, ['--custom', 'custom.txt', filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_filterwords_custom(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\nam\nnot\na\ncrook\n.' expected_tokens = ['World', '!', 'crook', '.'] custom_stopword_filename = 'custom.txt' custom_stopwords = 'hello\n' create_single_output(filename, sentence) create_single_output(custom_stopword_filename, custom_stopwords) result = runner.invoke(filterwords, ['--custom', 'custom.txt', filename]) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_filterlengths(): runner = CliRunner() with runner.isolated_filesystem(): filename = 'in.txt' sentence = 'Hello\nWorld\n!\nI\n.\nnot\nwin\n' create_single_output(filename, sentence) # default length 3 result = runner.invoke(filterlengths, [filename]) tokens = result.output.split('\n') expected_tokens = ['Hello', 'World', 'not', 'win'] assert result.exit_code == 0 compare_results(tokens, expected_tokens) # minumum length 4 result = runner.invoke(filterlengths, ['-m', '4', filename]) tokens = result.output.split('\n') expected_tokens = ['Hello', 'World'] assert result.exit_code == 0 compare_results(tokens, expected_tokens)