예제 #1
0
    def test_big_vocab_single_expect_true(self):

        bgf = txtPY.big_text_files()

        bgf.vocabulary_accumulator(
            **params_big_files.tok_kwargs_single_vocab)  # saves to folder

        lst_VOC_single = os.listdir(params_big_files.tok_write)

        assert 'VOCAB_single.txt' in lst_VOC_single
예제 #2
0
    def test_big_parser_expect_true(self):

        bgf = txtPY.big_text_files()

        bgf.big_text_parser(
            **params_big_files.tok_kwargs_parse)  # saves to folder

        lst_files = os.listdir(params_big_files.tok_write)

        for FILE in ['batch1.txt', 'batch2.txt']:

            assert FILE in lst_files
예제 #3
0
    def test_big_splitter_error_handling(self):

        bgf = txtPY.big_text_files()

        for sub_dict in range(len(params_big_files.tok_kwargs_error_handling)):

            with pytest.raises(Exception) as excinfo:

                bgf.big_text_splitter(
                    **params_big_files.tok_kwargs_error_handling[sub_dict])

            assert params_big_files.list_of_error_messages[sub_dict] in str(
                excinfo.value)
예제 #4
0
    def test_big_vocab_error_handling(self):

        bgf = txtPY.big_text_files()

        for sub_dict in range(
                len(params_big_files.tok_kwg_VOCAB_error_handling)):

            with pytest.raises(Exception) as excinfo:

                bgf.vocabulary_accumulator(
                    **params_big_files.tok_kwg_VOCAB_error_handling[sub_dict])

            assert params_big_files.list_of_error_messages_tok_VOCAB[
                sub_dict] in str(excinfo.value)
예제 #5
0
    def test_big_tokenizer_expect_true(self):

        bgf = txtPY.big_text_files()

        bgf.big_text_tokenizer(
            **params_big_files.tok_kwargs_bigf)  # saves to folder

        bgf.big_text_tokenizer(
            **params_big_files.tok_kwargs_bigf_userdef)  # saves to folder

        lst_files = os.listdir(params_big_files.tok_write)

        for FILE in ['batch1.txt', 'batch2.txt']:

            assert FILE in lst_files

        bgf.big_text_tokenizer(
            **params_big_files.tok_kwargs_bigf_vocab)  # saves to folder

        lst_VOCAB = os.listdir(params_big_files.tok_vocab)

        for VOCB in ['batch1.txt', 'batch2.txt']:

            assert VOCB in lst_VOCAB