def test_punc_multifile(): runner = CliRunner() with runner.isolated_filesystem(): filenames = ['in.txt', 'in2.txt'] sentences = ['Hello\nWorld\n!\nI\nlove,\ngo\n.', 'Goodbye World!\n I...\n know everything\'s about you?'] expected_tokens = ['!', ',', '.', '!', '...', "'", '?'] create_multifile_output(filenames, sentences) result = runner.invoke(text2punc, filenames) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_nonewlines_multifile(): runner = CliRunner() with runner.isolated_filesystem(): filenames = ['in.txt', 'in2.txt'] sentences = ['Hello\nWorld\n!\nI\nam\nin.', 'What are you\na creature\nof mystery'] expected_tokens = ['Hello World ! I am in. What are you a creature of mystery'] create_multifile_output(filenames, sentences) result = runner.invoke(nonewlines, filenames) tokens = result.output.split('\n') assert result.exit_code == 0 assert len(result.output.split('\n')) == 2 compare_results(tokens, expected_tokens)
def test_punc_multifile(): runner = CliRunner() with runner.isolated_filesystem(): filenames = ['in.txt', 'in2.txt'] sentences = [ 'Hello\nWorld\n!\nI\nlove,\ngo\n.', 'Goodbye World!\n I...\n know everything\'s about you?' ] expected_tokens = ['!', ',', '.', '!', '...', "'", '?'] create_multifile_output(filenames, sentences) result = runner.invoke(text2punc, filenames) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_text2words_multifile(): runner = CliRunner() with runner.isolated_filesystem(): filenames = ['in.txt', 'in2.txt'] sentences = ('Hello World!\nI.\nnot sure where to go', 'Goodbye World!\n I.\n know everything about you') expected_tokens = ['Hello', 'World', '!', 'I.', 'not', 'sure', 'where', 'to', 'go', 'Goodbye', 'World', '!', 'I.', 'know', 'everything', 'about', 'you'] create_multifile_output(filenames, sentences) result = runner.invoke(text2words, filenames) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)
def test_text2words_multifile(): runner = CliRunner() with runner.isolated_filesystem(): filenames = ['in.txt', 'in2.txt'] sentences = ('Hello World!\nI.\nnot sure where to go', 'Goodbye World!\n I.\n know everything about you') expected_tokens = [ 'Hello', 'World', '!', 'I.', 'not', 'sure', 'where', 'to', 'go', 'Goodbye', 'World', '!', 'I.', 'know', 'everything', 'about', 'you' ] create_multifile_output(filenames, sentences) result = runner.invoke(text2words, filenames) tokens = result.output.split('\n') assert result.exit_code == 0 compare_results(tokens, expected_tokens)