Example #1
0
def test_sentences():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello World! I love go.'
        expected_tokens = ['Hello World!', 'I love go.']
        create_single_output(filename, sentence)
        result = runner.invoke(text2sentences, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #2
0
def test_text2words():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello World!\nI.\nnot sure where to go'
        expected_tokens = ['Hello', 'World', '!', 'I.', 'not', 'sure', 'where', 'to', 'go'] 
        create_single_output(filename, sentence)
        result = runner.invoke(text2words, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #3
0
def test_filterpunc():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\n.\nnot'
        expected_tokens = ['Hello', 'World', 'I', 'not']
        create_single_output(filename, sentence)
        result = runner.invoke(filterpunc, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #4
0
def test_sentences():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello World! I love go.'
        expected_tokens = ['Hello World!', 'I love go.']
        create_single_output(filename, sentence)
        result = runner.invoke(text2sentences, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #5
0
def test_punc():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\nlove,\ngo\n.'
        expected_tokens = ['!', ',', '.']
        create_single_output(filename, sentence)
        result = runner.invoke(text2punc, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #6
0
def test_filterpunc():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\n.\nnot'
        expected_tokens = ['Hello', 'World', 'I', 'not']
        create_single_output(filename, sentence)
        result = runner.invoke(filterpunc, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #7
0
def test_words2ngrams():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\nlove\ngo\n.'
        expected_tokens = ['Hello World !', 'World ! I', '! I love', 'I love go']
        create_single_output(filename, sentence)
        result = runner.invoke(words2ngrams, ['-n', 3, filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #8
0
def test_filterwords():
    runner = CliRunner()
    with runner.isolated_filesystem():

        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\nam\nnot\na\ncrook\n.'
        expected_tokens = ['Hello', 'World', '!', 'crook', '.']
        create_single_output(filename, sentence)
        result = runner.invoke(filterwords, ['--language', 'english', filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #9
0
def test_top_bigrams():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'I\nworld\n!\nI\nlove\nyou\nthis\nworld\nand\nlove\nyou'
        create_single_output(filename, sentence)

        result = runner.invoke(tokens2topbigrams, [filename])
        assert result.exit_code == 0

        tokens = result.output.split('\n')
        assert tokens[0].split(',')[0:2] == ['love', 'you']
Example #10
0
def test_uppercase():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\n.\nnoooo\n'
        expected_tokens = ['HELLO', 'WORLD', '!', 'I', '.', 'NOOOO']
        create_single_output(filename, sentence)

        result = runner.invoke(tokens2upper, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #11
0
def test_top_bigrams():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'I\nworld\n!\nI\nlove\nyou\nthis\nworld\nand\nlove\nyou'
        create_single_output(filename, sentence)

        result = runner.invoke(tokens2topbigrams, [filename])
        assert result.exit_code == 0

        tokens = result.output.split('\n')
        assert tokens[0].split(',')[0:2] == ['love', 'you']
Example #12
0
def test_uppercase():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\n.\nnoooo\n'
        expected_tokens = ['HELLO', 'WORLD', '!', 'I', '.', 'NOOOO']
        create_single_output(filename, sentence)

        result = runner.invoke(tokens2upper, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #13
0
def test_pos_tokens():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nworld\n!\nI\nlove\nthis\nworld\nand\nlove\nyou'
        expected_tokens = ['Hello,NNP', 'world,NN', '!,.',
                           'I,PRP', 'love,VBP', 'this,DT',
                           'world,NN', 'and,CC', 'love,VB', 'you,PRP']
        create_single_output(filename, sentence)
        result = runner.invoke(tokens2pos, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #14
0
def test_filterwords():
    runner = CliRunner()
    with runner.isolated_filesystem():

        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\nam\nnot\na\ncrook\n.'
        expected_tokens = ['Hello', 'World', '!', 'crook', '.']
        create_single_output(filename, sentence)
        result = runner.invoke(filterwords,
                               ['--language', 'english', filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #15
0
def test_nonewlines():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\nam\nin.\n'
        expected_tokens = ['Hello World ! I am in.']

        create_single_output(filename, sentence)
        result = runner.invoke(nonewlines, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        assert len(result.output.split('\n')) == 2
        compare_results(tokens, expected_tokens)
Example #16
0
def test_pos_tokens():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nworld\n!\nI\nlove\nthis\nworld\nand\nlove\nyou'
        expected_tokens = [
            'Hello,NNP', 'world,NN', '!,.', 'I,PRP', 'love,VBP', 'this,DT',
            'world,NN', 'and,CC', 'love,VB', 'you,PRP'
        ]
        create_single_output(filename, sentence)
        result = runner.invoke(tokens2pos, [filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #17
0
def test_count_tokens():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello,\nworld\n!\nI\nlove\nthis\nworld\nand\nlove\nyou'
        expected_tokens = ['love,2', 'world,2', 'and,1', 'I,1', 'you,1',
                           'this,1', '\"Hello,\",1', '!,1', '']
        expected_tokens.sort()
        create_single_output(filename, sentence)
        result = runner.invoke(tokens2counts, [filename])
        tokens = result.output.split('\n')
        tokens.sort()
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #18
0
def test_count_tokens():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello,\nworld\n!\nI\nlove\nthis\nworld\nand\nlove\nyou'
        expected_tokens = [
            'love,2', 'world,2', 'and,1', 'I,1', 'you,1', 'this,1',
            '\"Hello,\",1', '!,1', ''
        ]
        expected_tokens.sort()
        create_single_output(filename, sentence)
        result = runner.invoke(tokens2counts, [filename])
        tokens = result.output.split('\n')
        tokens.sort()
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #19
0
def test_filterwords_custom():
    runner = CliRunner()
    with runner.isolated_filesystem():

        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\nam\nnot\na\ncrook\n.'
        expected_tokens = ['World','!','crook','.']
        custom_stopword_filename = 'custom.txt'
        custom_stopwords = 'hello\n'

        create_single_output(filename, sentence)
        create_single_output(custom_stopword_filename, custom_stopwords)

        result = runner.invoke(filterwords, ['--custom', 'custom.txt', filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #20
0
def test_filterwords_custom():
    runner = CliRunner()
    with runner.isolated_filesystem():

        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\nam\nnot\na\ncrook\n.'
        expected_tokens = ['World', '!', 'crook', '.']
        custom_stopword_filename = 'custom.txt'
        custom_stopwords = 'hello\n'

        create_single_output(filename, sentence)
        create_single_output(custom_stopword_filename, custom_stopwords)

        result = runner.invoke(filterwords,
                               ['--custom', 'custom.txt', filename])
        tokens = result.output.split('\n')
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #21
0
def test_filterlengths():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\n.\nnot\nwin\n'

        create_single_output(filename, sentence)

        # default length 3
        result = runner.invoke(filterlengths, [filename])
        tokens = result.output.split('\n')
        expected_tokens = ['Hello', 'World', 'not', 'win']
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)

        # minumum length 4
        result = runner.invoke(filterlengths, ['-m', '4', filename])
        tokens = result.output.split('\n')
        expected_tokens = ['Hello', 'World']
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)
Example #22
0
def test_filterlengths():
    runner = CliRunner()
    with runner.isolated_filesystem():
        filename = 'in.txt'
        sentence = 'Hello\nWorld\n!\nI\n.\nnot\nwin\n'

        create_single_output(filename, sentence)

        # default length 3
        result = runner.invoke(filterlengths, [filename])
        tokens = result.output.split('\n')
        expected_tokens = ['Hello', 'World', 'not', 'win']
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)

        # minumum length 4
        result = runner.invoke(filterlengths, ['-m', '4', filename])
        tokens = result.output.split('\n')
        expected_tokens = ['Hello', 'World']
        assert result.exit_code == 0
        compare_results(tokens, expected_tokens)