Exemple #1
0
def AIC_predict():
    print("iter : ", config.iter)
    # 형태소분석된 raw_sentence에 PIC 처리
    # input : config.result_input_path
    # output : config.result_processed_path
    main_tagger_PIC = Tagger()
    main_tagger_PIC.taggingPIC("result_tagging")

    # PIC 처리된 raw_sentence에 AIC 적용
    # input : config.result_processed_path
    # output : config.result_output_path
    main_tagger_AIC = Tagger()
    main_tagger_AIC.evaluateAIC("result")
    main_tagger_AIC.main_taggingAIC(mode="result_tagging")
Exemple #2
0
def AIC_train():
    # bootstrapping에 사용되는 학습데이터는 "PIC_train"을 통해 PIC 처리를 사전에 진행해야 함.
    for epoch in range(1, config.boot_iter):
        print("iter : ", config.iter)
        main_tagger_AIC = Tagger()
        main_tagger_AIC.main_trainAIC()
        main_tagger_AIC.evaluateAIC("main_model")
        self_trainig = main_tagger_AIC.main_taggingAIC(
            mode="self_tagging")  # init self training data\

        # 배깅 모델 학습
        splited_labels, splited_features, splited_sentences = split_self_labeling(
            self_trainig[0], self_trainig[1], self_trainig[2])
        print("%s_iter(main) -> self_labled_s1 : %s" %
              (config.iter, len(self_trainig[0])))

        for model_idx in range(1, config.model_num + 1):
            print("model_idx : ", model_idx)
            bagging_taggerAIC = Tagger()
            bagging_taggerAIC.bagging_trainAIC("bagging_train", model_idx,
                                               splited_features[model_idx - 1],
                                               splited_labels[model_idx - 1])
            print("bagging model%s acc" % model_idx)
            bagging_taggerAIC.evaluateAIC("bagging_eval",
                                          model_idx)  # each bagging model
            bagging_bootstrap(model_idx, splited_sentences[model_idx - 1],
                              splited_labels[model_idx - 1])

            if model_idx == 1:
                score_i, raw_sentences, features = bagging_taggerAIC.bagging_taggingAIC(
                    "self_tagging", model_idx)
            else:
                new_socre_i = bagging_taggerAIC.bagging_taggingAIC(
                    "self_tagging", model_idx)
                for idx, _ in enumerate(new_socre_i):
                    score_i[idx] = np.asarray(score_i[idx]) + np.asarray(
                        new_socre_i[idx])

        predicts = bagging_taggerAIC.score2tag(score_i, raw_sentences,
                                               features)
        main_bootstrap(predicts)
        config.iter += 1
Exemple #3
0
from data_utils import  load_cluster, load_weight_matrix
from config import Config
from tagger import Tagger

config = Config

# config.word2cluster : K-Means 알고리즘을 사용하여 사전에 분류된 술어 군집 dict
# config.weight_matrix : weighted voting 에서 사용되는 각 weight_matrix
# config.label2idx, config.idx2label : 정답 dict
config.word2cluster = load_cluster()
config.weight_matrix, config.label2idx, config.idx2label = load_weight_matrix()


# 형태소분석된 raw_sentence에 PIC 처리
# input : config.result_input_path
# output : config.result_processed_path
print("iter : ", config.iter)
main_tagger_PIC = Tagger()
main_tagger_PIC.taggingPIC("result_tagging")

# PIC 처리된 raw_sentence에 AIC 적용
# input : config.result_processed_path
# output : config.result_output_path
main_tagger_AIC = Tagger()
main_tagger_AIC.evaluateAIC("result")
main_tagger_AIC.main_taggingAIC(mode="result_tagging")