def test_lktb_expect_true(self): tk = txtPY.token_stats() res_lktb = tk.look_up_table(**params_token_stats.lkt_args) assert len(res_lktb) > 0 and len(res_lktb[-1]) == 2
def test_print_col_expect_true1(self): tk = txtPY.token_stats() res1col = tk.collocation_words( path_2file=params_token_stats.tok_file_parse) res_print_col = tk.print_collocations(word='number') assert len(res_print_col) > 0 and isinstance(res_print_col, dict)
def test_freq_error_handling(self): tk = txtPY.token_stats() for sub_dict in range(len(params_token_stats.lst_freq)): with pytest.raises(Exception) as excinfo: tk.freq_distribution(**params_token_stats.lst_freq[sub_dict]) assert params_token_stats.msg_freq[sub_dict] in str(excinfo.value)
def test_path2vec_expect_true(self): tk = txtPY.token_stats() res_file = tk.path_2vector(**params_token_stats.FILE_2VEC) assert isinstance(res_file, np.ndarray) and len(res_file) > 0 res_fold = tk.path_2vector(**params_token_stats.FOLDER_2VEC) assert isinstance(res_fold, np.ndarray) and len(res_fold) > 0
def test_lktb_print_expect_true(self): tk = txtPY.token_stats() res_lkt = tk.look_up_table(**params_token_stats.lkt_args) res_print_lkt = tk.print_words_lookup_tbl(n_gram='ag') assert len(res_print_lkt) > 0 and isinstance( res_print_lkt, np.ndarray) and isinstance(res_print_lkt[0], basestring)
def test_lktb_error_handling(self): tk = txtPY.token_stats() for sub_dict in range(len(params_token_stats.lst_lkt)): with pytest.raises(Exception) as excinfo: tk.look_up_table(**params_token_stats.lst_lkt[sub_dict]) assert params_token_stats.msg_lkt[sub_dict] in str(excinfo.value)
def test_print_col_error_handling(self): tk = txtPY.token_stats() for sub_dict in range(len(params_token_stats.lst_col)): with pytest.raises(Exception) as excinfo: tk.collocation_words(**params_token_stats.lst_col[sub_dict]) assert params_token_stats.msg_col[sub_dict] in str(excinfo.value)
def test_print_count_expect_true(self): tk = txtPY.token_stats() res1cnt = tk.count_character(**params_token_stats.cnt_in_lst) res_print_cnt = tk.print_count_character( **params_token_stats.print_cnt) assert len(res_print_cnt) > 0 and isinstance( res_print_cnt, np.ndarray) and len(res_print_cnt) > 0
def test_count_error_handling(self): tk = txtPY.token_stats() for sub_dict in range(len(params_token_stats.lst_cnt)): with pytest.raises(Exception) as excinfo: tk.count_character(**params_token_stats.lst_cnt[sub_dict]) assert params_token_stats.msg_cnt[sub_dict] in str(excinfo.value)
def test_path2vec_error_handling(self): tk = txtPY.token_stats() for sub_dict in range(len(params_token_stats.lst_path2vec)): with pytest.raises(Exception) as excinfo: tk.path_2vector(**params_token_stats.lst_path2vec[sub_dict]) assert params_token_stats.msg_path2vec[sub_dict] in str( excinfo.value)
def test_dism_error_handling(self): tk = txtPY.token_stats() for sub_dict in range(len(params_token_stats.lst_dis)): with pytest.raises(Exception) as excinfo: tk.string_dissimilarity_matrix( **params_token_stats.lst_dis[sub_dict]) assert params_token_stats.msg_dis[sub_dict] in str(excinfo.value)
def test_count_expect_true(self): tk = txtPY.token_stats() res1cnt = tk.count_character(**params_token_stats.cnt_in_lst) res2cnt = tk.count_character(**params_token_stats.cnt_in_fold) res3cnt = tk.count_character(**params_token_stats.cnt_in_file) for item in [res1cnt, res2cnt, res3cnt]: assert len(item) > 0 and isinstance(item, np.ndarray) and len(item) > 0
def test_freq_expect_true(self): tk = txtPY.token_stats() res1 = tk.freq_distribution(**params_token_stats.freq_in_lst) res2 = tk.freq_distribution(**params_token_stats.freq_in_fold) res3 = tk.freq_distribution(**params_token_stats.freq_in_file) res4 = tk.freq_distribution(**params_token_stats.freq_in_lst_keep) for item in [res1, res2, res3]: assert item.shape[0] > 0 and item.shape[1] > 0 assert res4.shape[0] == 10 and res4.shape[1] > 0
def test_dism_expect_true(self): tk = txtPY.token_stats() res_dism1 = tk.string_dissimilarity_matrix( **params_token_stats.dism_dice) res_dism2 = tk.string_dissimilarity_matrix( **params_token_stats.dism_lev) res_dism3 = tk.string_dissimilarity_matrix( **params_token_stats.dism_cos) assert res_dism1.shape[0] == 30 and res_dism1.shape[1] == 30 assert res_dism2.shape[0] == 30 and res_dism2.shape[1] == 30 assert res_dism3.shape[0] == 4 and res_dism3.shape[1] == 4
def test_print_col_expect_true(self): tok_trans = txtPY.tokenizer() tmp_res = tok_trans.transform_text( input_string=params_token_stats.tok_file, to_lower=True, split_string=True, min_n_gram=3, max_n_gram=3, n_gram_delimiter="_") tk = txtPY.token_stats() res_col_lst = tk.collocation_words(x_vector=tmp_res) res_col_path_2file = tk.collocation_words( path_2file=params_token_stats.tok_file_parse) assert len(res_col_lst) == len( res_col_path_2file) and len(res_col_path_2file) != 0