Exemple #1
0
def _merger_dictionary(name):
    readFile.clear()
    readFile._read_file(name)
    # Unigram
    print("Add uni")
    for idx, item in readFile.unigramDict.items():
        if idx in UNIGRAM_DICT:
            if "count" in item:
                if "count" in UNIGRAM_DICT[idx]:
                    UNIGRAM_DICT[idx][
                        "count"] = UNIGRAM_DICT[idx]["count"] + item["count"]
                else:
                    UNIGRAM_DICT[idx] = copy.deepcopy(item)
        else:
            UNIGRAM_DICT[idx] = copy.deepcopy(item)
    # Ngram
    print("Add nrg ")
    for idx, item in readFile.ngramDict.items():
        if idx in NGRAM_DICT:
            if "count" in item:
                if "count" in NGRAM_DICT[idx]:
                    NGRAM_DICT[idx][
                        "count"] = NGRAM_DICT[idx]["count"] + item["count"]
                else:
                    NGRAM_DICT[idx] = copy.deepcopy(item)
        else:
            NGRAM_DICT[idx] = copy.deepcopy(item)
def _main_v3(s):
    i = 0
    for file in os.listdir(s):
        print(file + " {}/{}".format(i,len(os.listdir(s))))
        readFile.clear()
        readFile._read_file(s + file)
        # _merge_dic_to_csv(readFile.unigramDict, True)
        # _merge_dic_to_csv(readFile.ngramDict, False)
        _merge_dic_to_csv_v2(readFile.unigramDict, True)
        _merge_dic_to_csv_v2(readFile.ngramDict, False)
        i = i + 1
Exemple #3
0
def _tool():
    readFile.clear()
    readFile._read_file("DicTest.txt")
    global UNIGRAM_DICT
    global NGRAM_DICT
    UNIGRAM_DICT = copy.deepcopy(readFile.unigramDict)
    NGRAM_DICT = copy.deepcopy(readFile.ngramDict)
    # UNIGRAM_DICT = copy.deepcopy(_read_dic_to_json('unigram_json.txt'))
    # NGRAM_DICT = copy.deepcopy(_read_dic_to_json('ngram_json.txt'))
    print("start")
    for x in range(20, 21):
        # if x <= 9:
        #     s = path + "0" + str(x) + folder + "/"
        # else:
        #     s = path + str(x) + folder + "/"
        s = s = path + folder + "/"
        print(s)
        _main_v2(s)
    print("get pro uni")
    readFile._get_probability_uni(UNIGRAM_DICT)
    print("get pro ngr")
    readFile._get_probability_ngram(UNIGRAM_DICT, NGRAM_DICT)
    writeFile._write_dictionary(UNIGRAM_DICT, NGRAM_DICT, "DicTest.txt")
def _first_run_v2():
    readFile.clear()
    readFile._read_file('text.txt')
    _merge_dic_to_csv_v2(readFile.unigramDict, True)
    _merge_dic_to_csv_v2(readFile.ngramDict, False)
def _first_run():
    readFile.clear()
    readFile._read_file('main_vi.dict_chinh_quy_bo_ky_tu.txt')
    _merge_dic_to_csv(readFile.unigramDict, True)
    _merge_dic_to_csv(readFile.ngramDict, False)