def extract_result(text):
    sentences = sepearate(text)
    regex = ","
    file_path = Dir.resourceDir+"dict/civil_mariiage_result"
    judge_words = tools.readLines(file_path)
    result = {}
    for sentence in sentences:
        flag = False
        details_sentences = re.split(regex,sentence)
        for details_sentence in details_sentences:
            words = list(jieba.cut(details_sentence))
            count =0
            for word in judge_words:
                if word in words:
                    count+=1
            if count/ words.__len__()>0.3:
                words_sent = list(jieba.cut(sentence))
                inter = set(words_sent).intersection(set(words))
                sentence_simple = ""
                if "由此" in sentence:
                    sentence_simple = sentence[sentence.find("由此")+2:]
                elif ",故" in sentence:
                    sentence_simple = sentence[sentence.find(",故")+2:]
                else:
                    sentence_simple = sentence
                if sentence_simple not in result.keys():
                    result[sentence_simple] = inter.__len__() / words_sent.__len__()
                    flag = True
                    break;
            else:
                if re.findall("\d*?元",sentence):
                    result[sentence] = "元"
            if flag:
                break
    return list(result.keys())
def extract_laws(text):
    path = Dir.resourceDir + "dict\\LawsName"
    laws_name = tools.readLines(path)
    lines = seperate_sentences(text)
    middle_result = []
    for line in lines:
        if "《" in line:
            middle_result.append(line)
    result = []
    for line in middle_result:
        index = int(line.find("《"))
        line = line[index:]
        end = int(line.find("规定"))
        words = str(line[:end])
        if words.__len__() > 0:
            # if True:
            if re.match("的|之", words[-1]) != None:
                words = str(words[:-1])
            index_end = int(words.find("》"))
            if "第" not in words[index_end:index_end + 10]:
                words = str(words[:index_end + 1])
            if contain_laws_name(words, laws_name):
                result.append(words)
            elif "中华人民共和国" in words:
                result.append(words)
    return result
def extract_from_texts(text_dict, func):
    result = {}
    for file in text_dict.keys():
        text = text_dict[file]
        tmp = extract_from_text(text, func)

        if tmp != None:
            result[tools.get_filename(file)] = tmp
    return result
def observate_sentence_lenth(filepath,
                             savepath=Dir.resourceDir +
                             "\\ObservationResult\\result_"):
    savepath += tools.get_filename(filepath) + ".txt"
    result = tools.read_dir(filepath)
    split_regex = "。|?|!"
    observation_result = ""
    print(filepath)
    tmp_result = {}
    for i in range(result.__len__()):
        length = re.split(split_regex, result[i]).__len__()
        if length not in tmp_result.keys():
            tmp_result[length] = 0
        tmp_result[length] += 1
        observation_result += str(i) + "\t" + str(length) + "\n"
    # print(observation_result)
    for key in tmp_result.keys():
        print(str(key) + "\t" + str(tmp_result[key]))
    tools.write(savepath, observation_result)
def demo():
    paths = [
             Dir.resourceDir+"\\摘要文书\\离婚纠纷"
             # ,
             # Dir.resourceDir+"\\摘要文书\\民间借贷纠纷"
    ]
    filepath = Dir.resourceDir+"/结果/离婚纠纷结果/result.txt"
    for path in paths:
        text_list = tools.read_dir(path)
        string = ""
        for file in text_list.keys():
            text = text_list[file]

            result_laws = extract_laws(text)
            result_fact = extract_fact(text)
            result_result = extract_result(text)
            # print(text)
            string += tools.get_filename(file)+"\t"+text+"\t"+str(result_laws)+"\t"+str(result_fact)+"\t"+str(result_result)+"\n"

        tools.write(filepath,string)
Exemple #6
0
def demo():
    paths = [
        # Dir.resourceDir+"\\摘要文书\\故意伤害罪"
        # ,
        Dir.resourceDir + "\\摘要文书\\盗窃罪"
    ]
    content = ""
    for path in paths:
        text_list = tools.read_dir(path)
        for file in text_list.keys():
            text = text_list[file]
            result = extract(text)

            string = tools.get_filename(file) + "\t" + text + "\t" + str(
                result[0])[1:-1] + "\t" + str(result[1])[1:-1] + "\t" + str(
                    result[2])[1:-1] + "\t" + str(result[3])[1:-1]
            content += string + "\n"
            print(string)

    filepath = Dir.resourceDir + "结果\\盗窃罪结果\\result.txt"
    tools.write(filepath, content)
def extract_result(text):
    result = set()
    path1 = Dir.resourceDir + "dict\\guilty_name.txt"
    guilty_names = tools.readLines(path1)
    sentences = seperate_sentences(text)
    for sentence in sentences:
        for guilty_name in guilty_names:
            if guilty_name in sentence:
                index = sentence.find(guilty_name)
                if "构成" in sentence[index - 10:index] and (
                        "," in sentence[index:index + guilty_name.__len__() +
                                        2] or index + guilty_name.__len__() + 2
                        > sentence.__len__()):
                    result.add(guilty_name)
    return list(result)
def extract_fact_from_guilty(text):
    result = []
    sentences = seperate_sentences(text)
    for sentence in sentences:
        path1 = Dir.resourceDir + "dict\\guilty_name.txt"
        guilty_names = tools.readLines(path1)
        for guilty_name in guilty_names:
            if guilty_name in sentence:
                index = sentence.find(guilty_name)
                if "," in sentence[:index]:
                    index = sentence[:index].rfind(",")
                start = sentence[:index].find("被告人")
                if index > start + 10:
                    result.append(guilty_name + ":" +
                                  str(sentence[start:index]))
    return result
def demo(func):

    # path = Dir.resourceDir+"\\摘要文书\\故意伤害罪"
    paths = [
        Dir.resourceDir + "\\摘要文书\\故意伤害罪"
        # ,Dir.resourceDir+"\\摘要文书\\离婚纠纷",
        # Dir.resourceDir+"\\摘要文书\\盗窃罪"
        # Dir.resourceDir+"\\摘要文书\\民间借贷纠纷"
    ]
    final_result = []
    for path in paths:
        text_list = tools.read_dir(path)
        result = extract_from_texts(text_list, func)

        string = ""
        for res in result.keys():
            string += res + "\t"
            for tmp in result[res]:
                string += str(tmp) + "\t"
            string += "\n"
        print(string)
def loadWords():
    file = Dir.resourceDir+"dict\civil_marriage"
    words = tools.readLines(file)
    return words
Exemple #11
0
        idf = math.log(
            float(count) / float(reversed_index[word].__len__() + 1), math.e)
        if word not in word_idf.keys():
            word_idf[word] = idf

    tfidf = {}
    for index in tf.keys():
        if index not in tfidf.keys():
            tfidf[index] = {}
        words = tf[index]
        for word in words.keys():
            if word not in tfidf[index].keys():
                tfidf[index][word] = [
                    words[word], word_idf[word], words[word] * word_idf[word]
                ]
            print(word, tfidf[index][word])
    return tfidf


file_dir = Dir.resourceDir + "摘要文书\\离婚纠纷\\"
text_list = tools.read_dir(file_dir)
combineTextAndGetTf(text_list)
# result =gettfidf(text_list)
# for key in result.keys():
#     string =""
#     for word in result[key].keys():
#         string += word+":"+str(result[key][word])
#         string+="##"
#     string+="\n"
# print(string)