Python token_stats 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: textTinyPy

메소드/함수: token_stats

hotexamples.com에서의 예제들: 15

Python token_stats - 15개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 textTinyPy.token_stats에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_lktb_expect_true(self):

        tk = txtPY.token_stats()

        res_lktb = tk.look_up_table(**params_token_stats.lkt_args)

        assert len(res_lktb) > 0 and len(res_lktb[-1]) == 2

예제 #2

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_print_col_expect_true1(self):

        tk = txtPY.token_stats()

        res1col = tk.collocation_words(
            path_2file=params_token_stats.tok_file_parse)

        res_print_col = tk.print_collocations(word='number')

        assert len(res_print_col) > 0 and isinstance(res_print_col, dict)

예제 #3

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_freq_error_handling(self):

        tk = txtPY.token_stats()

        for sub_dict in range(len(params_token_stats.lst_freq)):

            with pytest.raises(Exception) as excinfo:

                tk.freq_distribution(**params_token_stats.lst_freq[sub_dict])

            assert params_token_stats.msg_freq[sub_dict] in str(excinfo.value)

예제 #4

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_path2vec_expect_true(self):

        tk = txtPY.token_stats()

        res_file = tk.path_2vector(**params_token_stats.FILE_2VEC)

        assert isinstance(res_file, np.ndarray) and len(res_file) > 0

        res_fold = tk.path_2vector(**params_token_stats.FOLDER_2VEC)

        assert isinstance(res_fold, np.ndarray) and len(res_fold) > 0

예제 #5

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_lktb_print_expect_true(self):

        tk = txtPY.token_stats()

        res_lkt = tk.look_up_table(**params_token_stats.lkt_args)

        res_print_lkt = tk.print_words_lookup_tbl(n_gram='ag')

        assert len(res_print_lkt) > 0 and isinstance(
            res_print_lkt, np.ndarray) and isinstance(res_print_lkt[0],
                                                      basestring)

예제 #6

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_lktb_error_handling(self):

        tk = txtPY.token_stats()

        for sub_dict in range(len(params_token_stats.lst_lkt)):

            with pytest.raises(Exception) as excinfo:

                tk.look_up_table(**params_token_stats.lst_lkt[sub_dict])

            assert params_token_stats.msg_lkt[sub_dict] in str(excinfo.value)

예제 #7

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_print_col_error_handling(self):

        tk = txtPY.token_stats()

        for sub_dict in range(len(params_token_stats.lst_col)):

            with pytest.raises(Exception) as excinfo:

                tk.collocation_words(**params_token_stats.lst_col[sub_dict])

            assert params_token_stats.msg_col[sub_dict] in str(excinfo.value)

예제 #8

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_print_count_expect_true(self):

        tk = txtPY.token_stats()

        res1cnt = tk.count_character(**params_token_stats.cnt_in_lst)

        res_print_cnt = tk.print_count_character(
            **params_token_stats.print_cnt)

        assert len(res_print_cnt) > 0 and isinstance(
            res_print_cnt, np.ndarray) and len(res_print_cnt) > 0

예제 #9

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_count_error_handling(self):

        tk = txtPY.token_stats()

        for sub_dict in range(len(params_token_stats.lst_cnt)):

            with pytest.raises(Exception) as excinfo:

                tk.count_character(**params_token_stats.lst_cnt[sub_dict])

            assert params_token_stats.msg_cnt[sub_dict] in str(excinfo.value)

예제 #10

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_path2vec_error_handling(self):

        tk = txtPY.token_stats()

        for sub_dict in range(len(params_token_stats.lst_path2vec)):

            with pytest.raises(Exception) as excinfo:

                tk.path_2vector(**params_token_stats.lst_path2vec[sub_dict])

            assert params_token_stats.msg_path2vec[sub_dict] in str(
                excinfo.value)

예제 #11

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_dism_error_handling(self):

        tk = txtPY.token_stats()

        for sub_dict in range(len(params_token_stats.lst_dis)):

            with pytest.raises(Exception) as excinfo:

                tk.string_dissimilarity_matrix(
                    **params_token_stats.lst_dis[sub_dict])

            assert params_token_stats.msg_dis[sub_dict] in str(excinfo.value)

예제 #12

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_count_expect_true(self):

        tk = txtPY.token_stats()

        res1cnt = tk.count_character(**params_token_stats.cnt_in_lst)

        res2cnt = tk.count_character(**params_token_stats.cnt_in_fold)

        res3cnt = tk.count_character(**params_token_stats.cnt_in_file)

        for item in [res1cnt, res2cnt, res3cnt]:

            assert len(item) > 0 and isinstance(item,
                                                np.ndarray) and len(item) > 0

예제 #13

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_freq_expect_true(self):

        tk = txtPY.token_stats()

        res1 = tk.freq_distribution(**params_token_stats.freq_in_lst)

        res2 = tk.freq_distribution(**params_token_stats.freq_in_fold)

        res3 = tk.freq_distribution(**params_token_stats.freq_in_file)

        res4 = tk.freq_distribution(**params_token_stats.freq_in_lst_keep)

        for item in [res1, res2, res3]:

            assert item.shape[0] > 0 and item.shape[1] > 0

        assert res4.shape[0] == 10 and res4.shape[1] > 0

예제 #14

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_dism_expect_true(self):

        tk = txtPY.token_stats()

        res_dism1 = tk.string_dissimilarity_matrix(
            **params_token_stats.dism_dice)

        res_dism2 = tk.string_dissimilarity_matrix(
            **params_token_stats.dism_lev)

        res_dism3 = tk.string_dissimilarity_matrix(
            **params_token_stats.dism_cos)

        assert res_dism1.shape[0] == 30 and res_dism1.shape[1] == 30

        assert res_dism2.shape[0] == 30 and res_dism2.shape[1] == 30

        assert res_dism3.shape[0] == 4 and res_dism3.shape[1] == 4

예제 #15

0

파일 보기

파일: test_token_stats.py 프로젝트: mlampros/textTinyPy

    def test_print_col_expect_true(self):

        tok_trans = txtPY.tokenizer()

        tmp_res = tok_trans.transform_text(
            input_string=params_token_stats.tok_file,
            to_lower=True,
            split_string=True,
            min_n_gram=3,
            max_n_gram=3,
            n_gram_delimiter="_")

        tk = txtPY.token_stats()

        res_col_lst = tk.collocation_words(x_vector=tmp_res)

        res_col_path_2file = tk.collocation_words(
            path_2file=params_token_stats.tok_file_parse)

        assert len(res_col_lst) == len(
            res_col_path_2file) and len(res_col_path_2file) != 0