Example #1
0
def update_law(location, location_code, location_level, law_id):
    cursor = conn.cursor()
    update_sql = "update law set location = %s, location_code = %s, location_level = %s where id = %s"
    try:
        cursor.execute(update_sql,
                       (location, location_code, location_level, law_id))
        conn.commit()
        print(str(law_id) + '-----------------------UPDATE SUCCESS')
    except Exception as e:
        conn.rollback()
        print('\033[1;32;41m' + str(law_id) + ': FAILED---------' + e +
              '\033[0m')
def law_classify():  # 法律法规文本归类
    dir_path = "C:\\Users\\dhz1216\\Desktop\\wenben"
    class_select_sql = "select * from law_class"  # 查找法律分类表law_class
    law_select_sql = "select id from law where text_name = %s"  # 根据文本名称查找law ID
    update_sql = "update law set type = %s where id = %s"  # 根据ID更新law表中的type信息
    insert_sql = "insert into law_to_class (law_id, class_id) value (%s, %s)"  # 插入法律和分类对照表

    cursor = conn.cursor()
    cursor.execute(class_select_sql)
    results = cursor.fetchall()
    class_id_dict = dict()  # 记录类别和id的对应关系
    class_keyword_dict = dict()  # 记录类别和关键词的对应关系
    for res in results:
        class_id_dict.update({res[1]: res[0]})
        class_keyword_dict.update({res[1]: str(res[2]).split(',')})

    for file in os.listdir(dir_path):
        text_name = file.split('.')[0]
        cursor.execute(law_select_sql, (text_name))
        law = cursor.fetchone()
        if law is None:
            continue
        law_id = law[0]
        class_type = str()
        for c in class_id_dict:
            for word in class_keyword_dict[c]:
                if word in text_name:
                    class_type = c
                    break
            if class_type is not None and class_type != '':
                break

        if class_type is None or class_type == '':
            class_type = '其他'
        class_id = class_id_dict[class_type]  # 获取所属类别的id

        try:
            cursor.execute(update_sql, (class_type, law_id))
            conn.commit()
            try:
                cursor.execute(insert_sql, (law_id, class_id))
                conn.commit()
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + text_name + e +
                      ': INSERT FAILED---------' + '\033[0m')
            print(text_name + '--------success--------' + class_type)
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + text_name + e +
                  ': UPDATE FAILED---------' + '\033[0m')
        print(text_name + '------------------' + class_type + str(class_id))
Example #3
0
def chapter_article_parser(file_path):      # "第xx条"类文本解析与格式化
    file_name = file_path.split('\\')[-1]
    write_path = "C:\\Users\\dhz1216\\Desktop\\washing\\第一类\\" + file_name
    # 查询法规基本数据库中该文本的id和对应的法规名称
    law_name = file_name.split('.')[0]
    try:
        cursor = conn.cursor()
        select_sql = "select id from law where text_name = %s"
        cursor.execute(select_sql, (file_name.split('.')[0]))
        law_id = cursor.fetchone()[0]
    except Exception as e:
        print(e)
        return

    with open(file_path, "r", encoding='gbk', errors='ignore') as f:
        line = f.readline()
        pattern = re.compile("第(.*?)(?:章|条)")
        while line:
            if line.startswith('【法规全文】'):
                line = line.replace('【法规全文】', '')
                with open(write_path, "a") as w:
                    while line:
                        match = pattern.match(line.lstrip())
                        if match:
                            p_key = match.group()
                            p_content = line.replace(match.group(), '').lstrip()
                            line = f.readline()
                            while line:
                                match = pattern.match(line.lstrip())
                                if not match:
                                    p_content = p_content + line
                                    line = f.readline()
                                else:
                                    break
                            w.write(p_key + ':  ' + p_content + '\n')
                            insert_sql = "insert into law_content_parse (law_id, p_key, p_content, law_name) " \
                                         "value (%s, %s, %s, %s)"
                            try:
                                cursor.execute(insert_sql, (law_id, p_key, p_content, law_name))
                                conn.commit()
                                print(file_name + ': PARSE SUCCESS')
                            except Exception as e:
                                print(e)
                                conn.rollback()
                                print('\033[1;32;41m' + file_name + ': PARSE FAILED---------' + '\033[0m')
                        else:
                            line = f.readline()
            else:
                line = f.readline()
def science_spot_parser(file_path):
    dir_path = "C:\\Users\\dhz1216\\Desktop\\washing\\风景名胜"
    file_name = file_path.split("\\")[-1]
    cursor = conn.cursor()
    select_sql = "select id, name from law where text_name = %s"
    cursor.execute(select_sql, (file_name.split('.')[0]))
    law_id = cursor.fetchone()[0]
    count = 0
    with open(file_path, "r", encoding='gbk', errors='ignore') as f:
        line = f.readline()
        while line:
            if line.startswith('【法规全文】'):
                with open(dir_path + "\\" + file_name, "a") as w:
                    line = line.replace('【法规全文】', '')
                    # line = f.readline()
                    while line:
                        if len(line.lstrip().split(' ')) > 1:
                            key_title = line.lstrip().split(' ')[0]
                            value_content = line.lstrip().split(' ')[1]
                            line = f.readline()
                            while line:
                                if len(line.lstrip().split(' ')) <= 1:
                                    value_content = value_content + line.lstrip(
                                    ).split(' ')[0]
                                    line = f.readline()
                                else:
                                    break
                            w.write(key_title + ':' + value_content + '\n')
                            insert_sql = "insert into law_content (law_id, p_key, p_content, law_class) " \
                                         "value (%s, %s, %s, %s)"
                            try:
                                cursor.execute(
                                    insert_sql,
                                    (law_id, key_title, value_content, '风景名胜'))
                                conn.commit()
                                count = count + 1
                                print('\033[1;37;40m' + file_name +
                                      ': PARSE SUCCESS' + '\033[0m')
                            except Exception as e:
                                print(e)
                                conn.rollback()
                                print('\033[1;32;41m' + file_name +
                                      ': PARSE FAILED---------' + '\033[0m')

                        else:
                            line = f.readline()
            else:
                line = f.readline()
    print('共插入:' + str(count) + '条')
Example #5
0
def one_two_article_parser(file_path):
    file_name = file_path.split('\\')[-1]
    write_path = "C:\\Users\\dhz1216\\Desktop\\washing\\第二类\\" + file_name

    # 查询法规基本数据库中该文本的id和对应的法规名称
    law_name = file_name.split('.')[0]
    try:
        cursor = conn.cursor()
        select_sql = "select id from law where text_name = %s"
        cursor.execute(select_sql, (file_name.split('.')[0]))
        law_id = cursor.fetchone()[0]
    except Exception as e:
        print(e)
        return

    with open(file_path, "r", encoding='gbk', errors='ignore') as f:
        line = f.readline()
        while line:
            if line.startswith('【法规全文】'):
                line = line.replace('【法规全文】', '')
                with open(write_path, "a") as w:
                    while line:
                        if is_cotain_one_two_title(line) is not None:
                            p_key = is_cotain_one_two_title(line)
                            p_content = line.lstrip().replace(p_key, '')
                            line = f.readline()
                            while line:
                                if is_cotain_one_two_title(line) is None:
                                    p_content = p_content + line
                                    line = f.readline()
                                else:
                                    break
                            w.write(p_key + ': ' + p_content + '\n')
                            insert_sql = "insert into law_content_parse (law_id, p_key, p_content, law_name) " \
                                         "value (%s, %s, %s, %s)"
                            try:
                                cursor.execute(insert_sql, (law_id, p_key, p_content, law_name))
                                conn.commit()
                                print(file_name + ': PARSE SUCCESS')
                            except Exception as e:
                                print(e)
                                conn.rollback()
                                print('\033[1;32;41m' + file_name + ': PARSE FAILED---------' + '\033[0m')
                        else:
                            line = f.readline()
            else:
                line = f.readline()
    pass
def explain_relation_process(law_id, sentence_parse_info, content):
    cursor = conn.cursor()
    insert_sql = '''insert into response_to_explain (law_id, responsibility, relation, from_sentence) value (%s, %s, %s, %s)'''
    for verb in dict(sentence_parse_info).keys():
        # [('A1', '本办法'), ('A0', '由省财政厅'), ('C-A1', '解释')]
        verb_role_list = sentence_parse_info[verb]
        verb_role_dict = dict()
        for role in verb_role_list:
            if role[0] in verb_role_dict:
                verb_role_dict[role[0]].append(role[1])
            else:
                verb_role_dict.update({role[0]: []})
                verb_role_dict[role[0]].append(role[1])
        # print(verb_role_dict)
        role_list = list(verb_role_dict.keys())
        if 'A1' not in role_list and 'C-A1' not in role_list:
            continue
        elif 'A0' not in role_list or len(verb_role_dict['A0']) > 1:
            continue
        else:
            law_name = ''
            relation_name = verb
            orgnization = verb_role_dict['A0'][0]
            if 'A1' in role_list and len(verb_role_dict['A1']) == 2:
                law_name = verb_role_dict['A1'][0]
                relation_name = relation_name + verb_role_dict['A1'][1]
            elif 'A1' in role_list and len(
                    verb_role_dict['A1']) == 1 and 'C-A1' in role_list:
                law_name = verb_role_dict['A1'][0]
                relation_name = relation_name + verb_role_dict['C-A1'][0]
            elif 'A1' in role_list and len(
                    verb_role_dict['A1']) == 1 and 'C-A1' not in role_list:
                law_name = '本条例 | 本办法'
                relation_name = relation_name + verb_role_dict['A1'][0]
            elif 'A1' not in role_list and 'C-A1' in role_list:
                law_name = '本条例 | 本办法'
                relation_name = relation_name + verb_role_dict['C-A1'][0]
        print("【%s】(%s  ---%s-->  %s)" %
              (str(law_id), law_name, relation_name, orgnization))
        orgnization = str(orgnization).replace('由', '')
        try:
            cursor.execute(insert_sql,
                           (law_id, orgnization, relation_name, content))
            conn.commit()
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + law_id +
                  ': ------------PARSE FAILED---------' + '\033[0m')
Example #7
0
def clause_strip():  # 对法律条款去空格
    select_article_1_sql = '''select * from article_1_clause'''
    select_article_2_sql = '''select * from article_2_clause'''
    update_article_1_sql = '''update article_1_clause set clause_content = %s where id = %s'''
    update_article_2_sql = '''update article_2_clause set clause_content = %s where id = %s'''
    cursor = conn.cursor()
    cursor.execute(select_article_1_sql)
    article_1_clauses = cursor.fetchall()
    cursor.execute(select_article_2_sql)
    article_2_clauses = cursor.fetchall()
    for a1_clause in article_1_clauses:
        a1_clause_id = a1_clause[0]
        a1_clause_content = str(a1_clause[3]).strip()
        try:
            cursor.execute(update_article_1_sql,
                           (a1_clause_content, a1_clause_id))
            conn.commit()
            print(
                str(a1_clause_id) + a1_clause_content +
                '------------------------------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + str(a1_clause_id) + '--' + e +
                  ': FAILED---------' + '\033[0m')
    print(
        '========================================================================================================='
    )
    print(
        '========================================================================================================='
    )
    print(
        '========================================================================================================='
    )
    for a2_clause in article_2_clauses:
        a2_clause_id = a2_clause[0]
        a2_clause_content = str(a2_clause[3]).strip()
        try:
            cursor.execute(update_article_2_sql,
                           (a2_clause_content, a2_clause_id))
            conn.commit()
            print(
                str(a2_clause_id) + a2_clause_content +
                '------------------------------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + str(a2_clause_id) + '--' + e +
                  ': FAILED---------' + '\033[0m')
def key_words_extract():  # 利用jieba的两种方式提取关键词,并做交集,更新到 law 表的 key_words 字段
    dir_path = "C:\\Users\\dhz1216\\Desktop\\wenben\\"
    cursor = conn.cursor()
    select_sql = "select id from law where text_name = %s"
    update_sql = "update law set key_words = %s where id = %s"
    for file in os.listdir(dir_path):
        with open(dir_path + file, "r", encoding='gbk', errors='ignore') as f:
            text = f.read()
            text_name = file.split('.')[0]
            cursor.execute(select_sql, (text_name))
            law = cursor.fetchone()
            if not law:
                continue
            law_id = law[0]
            textrank = analyse.textrank
            key_words_textrank = textrank(text,
                                          topK=3,
                                          withWeight=False,
                                          allowPOS=('n', 'ns', 'vn', 'v',
                                                    'nz'))
            key_words_tfidf = analyse.extract_tags(text,
                                                   topK=5,
                                                   withWeight=False,
                                                   allowPOS=('n', 'ns', 'vn',
                                                             'v', 'nz'))
            intersection_list = list(
                set(key_words_textrank).intersection(set(key_words_tfidf)))
            if intersection_list:
                key_words_list = intersection_list
            else:
                key_words_list = list(
                    set(key_words_textrank).union(set(key_words_tfidf)))
            key_words = str()
            for i in range(len(key_words_list)):
                if i == len(key_words_list) - 1:
                    key_words = key_words + key_words_list[i]
                else:
                    key_words = key_words + key_words_list[i] + ','

            try:
                cursor.execute(update_sql, (key_words, law_id))
                conn.commit()
                print(text_name + '--------UPDATE SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + text_name + ': PARSE FAILED---------' +
                      e + '\033[0m')
Example #9
0
def violate_punishment_save(is_contain_accord, violate_punishment_accord_info,
                            law_id):
    cursor = conn.cursor()
    if is_contain_accord == 1:
        contain_accord_insert_sql = '''insert into violate_punishment_accord 
                            (violate_law_id, violate_chapter_id, violate_article_id, violate_sentence_id, 
                             punishment_law_id, punishment_chapter_id, punishment_article_id, punishment_sentence_id,
                             accord_law_id, accord_chapter_id, accord_article_id, accord_sentence_id, 
                             violate_content, punishment_content, accord_content, is_contain_accord)
                             value (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
        violate_info = violate_punishment_accord_info['violate_info']
        punishment_info = violate_punishment_accord_info['punishment_info']
        accord_info = violate_punishment_accord_info['accord_info']
        try:
            cursor.execute(
                contain_accord_insert_sql,
                (law_id, violate_info[0], violate_info[1], violate_info[2],
                 law_id, punishment_info[0], punishment_info[1],
                 punishment_info[2], law_id, accord_info[0], accord_info[1],
                 accord_info[2], violate_info[3], punishment_info[3],
                 accord_info[3], int(is_contain_accord)))
            conn.commit()
        except Exception as e:
            conn.rollback()
            print(e)
    else:
        contain_accord_insert_sql = '''insert into violate_punishment_accord 
                                    (violate_law_id, violate_chapter_id, violate_article_id, violate_sentence_id, 
                                     punishment_law_id, punishment_chapter_id, punishment_article_id, punishment_sentence_id,
                                     violate_content, punishment_content, is_contain_accord)
                                     value (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
        violate_info = violate_punishment_accord_info['violate_info']
        punishment_info = violate_punishment_accord_info['punishment_info']
        try:
            cursor.execute(
                contain_accord_insert_sql,
                (law_id, violate_info[0], violate_info[1], violate_info[2],
                 law_id, punishment_info[0], punishment_info[1],
                 punishment_info[2], violate_info[3], punishment_info[3],
                 int(is_contain_accord)))
            conn.commit()
        except Exception as e:
            conn.rollback()
            print(e)
Example #10
0
def article_2_key_process():  # 将不包含“章”的条款的条款序号统一为 “第XX条”
    select_sql = "select id, a_key from article_2"
    update_sql = "update article_2 set a_key = %s where id = %s"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    articles = cursor.fetchall()
    for article in articles:
        if '条' not in article[1]:
            article_key = '第' + str(article[1]).replace('、', '') + '条'
            try:
                cursor.execute(update_sql, (article_key, article[0]))
                conn.commit()
                print(
                    str(article[0]) + article_key +
                    '--------------------UPDATE SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + str(article[0]) + article_key + e +
                      ': ARTICLE FAILED---------' + '\033[0m')
Example #11
0
def class_one_sentences_extracte():  # 第一类文本的单句提取
    pattern = re.compile("第(.*?)(?:章|条)")  # 定义正则表达式用以判断是否是第一类
    cursor = conn.cursor()
    select_sql = "select id, law_id, p_key, p_content from law_content_parse"
    cursor.execute(select_sql)
    results = cursor.fetchall()

    complex_count = 0
    single_count = 0
    insert_complex_sentence = "insert into sentences (law_id, title_id, sentence, is_single) " \
                              "value (%s, %s, %s, %s)"
    for res in results:
        if pattern.match(res[2]):
            title_id = res[0]  # law_content_parse的主键ID,对应sentences表中的title_id
            law_id = res[1]  # 对应法律法规id
            if ':' in str(res[3]):
                complex_count = complex_count + 1
                try:
                    cursor.execute(insert_complex_sentence,
                                   (law_id, title_id, res[3], 0))
                    conn.commit()
                    print(str(res[3]) + '-----------Success')
                except Exception as e:
                    print('\033[1;32;41m' + str(res[3]) + ': FAILED---------' +
                          '\033[0m')
                    conn.rollback()
                    print(e)
            else:
                single_count = single_count + 1
                sentences = str(res[3]).split('\n')
                for sentence in sentences:
                    if len(sentence) != 0:
                        try:
                            cursor.execute(insert_complex_sentence,
                                           (law_id, title_id, sentence, 1))
                            conn.commit()
                            print(str(sentence) + '-----------Success')
                        except Exception as e:
                            print('\033[1;32;41m' + sentence +
                                  ': FAILED---------' + '\033[0m')
                            conn.rollback()
                            print(e)
Example #12
0
def location_extract():  # 运用pyltp的分词和词性标注,识别法律法规所属地区(省市)
    select_sql = "select id, name from law"
    update_sql = "update law set location = %s where id = %s"
    cursor = conn.cursor()
    select_special_city_sql = "select name from city where name not like '%市'"
    cursor.execute(select_sql)
    results = cursor.fetchall()
    cursor.execute(select_special_city_sql)
    select_special_city = cursor.fetchall()
    special_city_list = list()
    for c in select_special_city:
        special_city_list.append(c[0])

    for result in results:
        title = result[1]
        location = ''
        for city in special_city_list:
            if city in title:
                location = city
                break
        if location is None or location == '':
            words = list(segmentor.segment(title))
            postag = list(postagger.postag(words))
            for index in range(len(words)):
                if postag[index] == 'ns':
                    location = location + words[index]
                    if postag[index + 1] == 'ns':
                        location = location + words[index + 1]
                        if postag[index + 2] == 'ns':
                            location = location + words[index + 2]
                    break

        if location is None or location == '' or len(location) <= 1:
            location = '中华人民共和国'
        try:
            cursor.execute(update_sql, (location, result[0]))
            conn.commit()
            print(str(result[0]) + result[1] + '-----------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + str(result[0]) + result[1] + e + '\033[0m')
Example #13
0
def law_to_class_process():  # 为mysql中law表添加class_id一列,对应类别的ID
    cursor = conn.cursor()
    law_select = "select id, type, name from law"
    class_select_sql = "select id from law_class where type = %s"
    update_sql = "update law set class_id = %s where id = %s"
    cursor.execute(law_select)
    laws = cursor.fetchall()
    for law in laws:
        law_id = law[0]
        law_type = law[1]
        cursor.execute(class_select_sql, (law_type))
        class_info = cursor.fetchone()
        class_id = class_info[0]
        try:
            cursor.execute(update_sql, (class_id, law_id))
            conn.commit()
            print(law[2] + '-----------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + law[2] + ': PARSE FAILED---------' + e +
                  '\033[0m')
def update_article():
    select_article_1_sentence = '''select * from article_1_sentence where is_single = 0'''
    select_article_2_sentence = '''select * from article_2_sentence where is_single = 0'''
    update_article_1_sentence = '''update article_1_sentence set content = %s where id = %s'''
    update_article_2_sentence = '''update article_2_sentence set content = %s where id = %s'''
    cursor = conn.cursor()
    cursor.execute(select_article_1_sentence)
    article_1_sentences = cursor.fetchall()
    for sentence in article_1_sentences:
        sentence_id = sentence[0]
        content = sentence[3] + ':'
        try:
            cursor.execute(update_article_1_sentence, (content, sentence_id))
            conn.commit()
            print(str(sentence_id), '-1-', content,
                  '------------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m', str(sentence_id), '-2-', e, '-------FAILED',
                  '\033[0m')

    print(
        '\n',
        '=============================================================================================',
        '\n')

    cursor.execute(select_article_2_sentence)
    article_2_sentences = cursor.fetchall()
    for sentence in article_2_sentences:
        sentence_id = sentence[0]
        content = sentence[3] + ':'
        try:
            cursor.execute(update_article_2_sentence, (content, sentence_id))
            conn.commit()
            print(str(sentence_id), '-2-', content,
                  '------------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m', str(sentence_id), '-2-', e, '-------FAILED',
                  '\033[0m')
Example #15
0
def save_relation(relation_list, law_id, content_class, chapter_id,
                  sentence_id):
    cursor = conn.cursor()
    insert_sql = '''insert into extract_relation 
                    (law_id, class, chapter_id, sentence_id, is_contain, subject, relation, object)
                    value (%s, %s, %s, %s, %s, %s, %s, %s)'''
    for relation in relation_list:
        subject = relation[0]
        relation_name = relation[1]
        object = relation[2]
        is_contain = 0
        if object == '根据章节条款信息补全list':
            is_contain = 1
        try:
            cursor.execute(insert_sql,
                           (law_id, content_class, chapter_id, sentence_id,
                            is_contain, subject, relation_name, object))
            conn.commit()
            print(subject, relation_name, object, '--------saved--------')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + relation + e + ': FAILED---------' +
                  '\033[0m')
Example #16
0
def article_1_sentence_extract():  # 将article_1 的句子尽心分割提取
    select_sql = "select * from article_1"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    articles = cursor.fetchall()
    for article in articles:
        article_1_id = article[0]
        article_1_content = article[2]
        insert_article_1_sentence_sql = '''insert into article_1_sentence (article_1_id, is_single, content) value (%s, %s, %s)'''
        if ':' in article_1_content:
            is_single = 0
            article_1_sentence_content = str(article_1_content).split(
                ':')[0].replace(" ", "")
            try:
                cursor.execute(
                    insert_article_1_sentence_sql,
                    (article_1_id, is_single, article_1_sentence_content))
                conn.commit()
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + str(article_1_id) +
                      article_1_sentence_content + e + ': FAILED---------' +
                      '\033[0m')

            article_1_clauses = str(article_1_content).split(':')[1].split(
                "\n")
            select_article1_sentence_id = '''SELECT id from article_1_sentence where id = (SELECT max(id) FROM article_1_sentence);'''
            cursor.execute(select_article1_sentence_id)
            sentence_id = cursor.fetchone()[0]
            for article_1_clause in article_1_clauses:
                if article_1_clause is not None and article_1_clause != '':
                    insert_article_1_clause_sql = '''insert into article_1_clause (article_1_id, article_1_sentence_id, clause_content) value (%s, %s, %s)'''
                    try:
                        cursor.execute(
                            insert_article_1_clause_sql,
                            (article_1_id, sentence_id,
                             str(article_1_clause).replace(" ", "")))
                        conn.commit()
                    except Exception as e:
                        conn.rollback()
                        print('\033[1;32;41m' + str(article_1_id) +
                              article_1_clause + e + ': FAILED---------' +
                              '\033[0m')
            print(article[2] +
                  '============================================SUCCESS')
        else:
            is_single = 1
            try:
                cursor.execute(insert_article_1_sentence_sql,
                               (article_1_id, is_single, article_1_content))
                conn.commit()
                print(article_1_content +
                      '=========================================SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + str(article_1_id) + '--' + e +
                      ': FAILED---------' + '\033[0m')
Example #17
0
def law_province_code_update():  # 提取法律所在省份的省份代码并更新law表的province_code字段
    select_sql = "select id, name, location, location_code from law"
    update_sql = "update law set province_code = %s where id = %s"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    laws = cursor.fetchall()
    for law in laws:
        law_id = law[0]
        law_name = law[1]
        law_location = law[2]
        if law[3] is not None:  # location_code不为None的时候更新
            law_location_code = law[3]
            province_code = str(law_location_code)[0:2] + '0000'
        else:
            province_code = '000000'

        try:
            cursor.execute(update_sql, (province_code, law_id))
            conn.commit()
            print(law_name + '-------------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + law_name + ': FAILED---------' + e +
                  '\033[0m')
def complex_main_sentence_analysis():  # 非单句主句依存句法分析以及语义角色标注,结果入库

    start_time = time.time()

    select_sql = '''select * from sentences where is_single = 0'''
    cursor = conn.cursor()
    cursor.execute(select_sql)
    complex_sentences = cursor.fetchall()
    for sentence in complex_sentences:
        sentence_id = sentence[0]
        main_sentence = str(sentence[3]).strip().split(':')[0] + ':'
        origin_words = list(segmentor.segment(main_sentence))  # 分词
        origin_postags = list(postagger.postag(origin_words))  # 词性标注
        arcs = parser.parse(origin_words, origin_postags)  # 依存句法分析
        roles = labeller.label(origin_words, origin_postags, arcs)  # 语义角色标注

        print('语义角色标注--------', str(len(roles)))  # 语义角色标注信息提取并存入数据库
        core_verb_list = list()
        insert_role_label_sql = '''insert into role_label (sentence_id, arg_name, arg_start, arg_end, core_verb_index) value (%s, %s, %s, %s, %s)'''
        for role in roles:
            core_verb_list.append(role.index)  # 建立核心动词索引列表
            for arg in role.arguments:
                arg_name = arg.name
                arg_start = arg.range.start
                arg_end = arg.range.end
                # 将语义角色标注信息插入到role_label表中
                try:
                    cursor.execute(insert_role_label_sql,
                                   (sentence_id, arg_name, arg_start, arg_end,
                                    role.index))
                    conn.commit()
                    print(str(sentence_id), main_sentence, '-----------',
                          origin_words[role.index], arg_name,
                          '-------------SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + str(sentence_id) + main_sentence +
                          e + ': ---------FAILED---------' + '\033[0m')

        # 提取动词信息,并插入数据库
        print('提取动词信息-------------------------------------------')
        insert_verb_sql = '''insert into verb (sentence_id, part_of_speech, loc_index, is_core) value (%s, %s, %s, %s)'''
        for index in range(len(origin_words)):
            if origin_postags[index] == 'v':
                is_core = 0
                if index in core_verb_list:
                    is_core = 1
                try:
                    cursor.execute(insert_verb_sql,
                                   (sentence_id, 'v', index, is_core))
                    conn.commit()
                    print(str(index), '--', origin_words[index],
                          '-----------------SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + str(index) + origin_words[index] +
                          e + ': ---------FAILED---------' + '\033[0m')

        # 提取其他词和动词的关系,没有关系的设为NONE
        arc_head = [a.head for a in arcs]
        arc_relation = [a.relation for a in arcs]
        tree_node_list = ['ROOT'] + origin_words
        postags = ['NONE'] + origin_postags
        for i in range(len(arc_head)):
            j = arc_head[i]
            head_index = j - 1
            tail_index = i
            relation = arc_relation[i]

            if arc_relation[i] == 'HED':
                update_verb_sql = '''update verb set is_head = 1 where sentence_id = %s and loc_index = %s'''
                print('更新根动词情况:')
                try:
                    cursor.execute(update_verb_sql, (sentence_id, i))
                    conn.commit()
                    print('根动词-----index: ', str(i), '----', origin_words[i],
                          '-----UPDATE SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + '根动词---' + str(i) +
                          origin_words[i] + e + ': ---------FAILED---------' +
                          '\033[0m')
                continue

            if head_index not in core_verb_list and tail_index not in core_verb_list:
                continue
            elif head_index in core_verb_list:
                part_of_speech = postags[i + 1]
                core_verb_index = head_index
                word = origin_words[i]
                loc = 'tail'
            else:
                part_of_speech = postags[j]
                core_verb_index = tail_index
                word = tree_node_list[j]
                loc = 'head'
            # TODO:----------------------------数据库插入操作--------------------------------------------
            insert_words_sql = '''insert into words (sentence_id, part_of_speech, core_verb_index, relation, word, head_or_tail) 
                                  value (%s, %s, %s, %s, %s, %s)'''
            try:
                cursor.execute(insert_words_sql,
                               (sentence_id, part_of_speech, core_verb_index,
                                relation, word, loc))
                conn.commit()
                print(tree_node_list[j], postags[j], '----', origin_words[i],
                      postags[i + 1], relation, '-----SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m', tree_node_list[j], postags[j], '----',
                      origin_words[i], postags[i + 1], relation, '-----FAILED',
                      e, '\033[0m')

        print(
            '\n',
            '===============================================================',
            '\n')

    end_time = time.time()
    print('处理', str(len(complex_sentences)), '条数据的总耗时为:',
          str(end_time - start_time), 's')
Example #19
0
def chapter_article_process(
):  # 法一(有问题):将法律文本的条款信息做进一步分表,分为两类,第一类包含“章”大标题,第二类只含条款
    select_sql = "select * from law_content_parse"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    contents = cursor.fetchall()

    index = 0
    while index < len(contents):
        pattern_chapter = re.compile("第(.*?)章")
        pattern_article = re.compile("第(.*?)条")
        match_chapter = pattern_chapter.match(contents[index][2])
        if match_chapter:
            # 此处将章的信息插入chapter
            insert_chapter_sql = "insert into chapter (chapter_key, chapter_name, law_id) value (%s, %s, %s)"
            try:
                cursor.execute(insert_chapter_sql,
                               (contents[index][2], contents[index][3],
                                contents[index][1]))
                conn.commit()
                print(contents[index][5] + '----' + contents[index][2] +
                      '----------------SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + contents[index][5] + e +
                      ': CHAPTER FAILED---------' + '\033[0m')
                return

            index = index + 1
            while index < len(contents):
                select_chapter_sql = 'SELECT id from chapter where id = (SELECT max(id) FROM chapter);'
                cursor.execute(select_chapter_sql)  # TODO: 此处添加逻辑判断是否已经读到下一篇法规
                chapter_id = cursor.fetchone()[0]
                match_article = pattern_article.match(contents[index][2])
                if match_article:
                    # 此处插入article信息
                    insert_article1_sql = "insert into article_1 (a_key, a_content, chapter_id) value (%s, %s, %s)"
                    try:
                        cursor.execute(insert_article1_sql,
                                       (contents[index][2], contents[index][3],
                                        chapter_id))
                        conn.commit()
                        print(contents[index][5] + '----' +
                              contents[index][2] + '----------------SUCCESS')
                    except Exception as e:
                        conn.rollback()
                        print('\033[1;32;41m' + contents[index][5] + e +
                              ': ARTICLE FAILED---------' + '\033[0m')
                        return
                    index = index + 1
                else:
                    print('-----------------------------' +
                          contents[index][5] +
                          '----------------------------------')
                    break
        else:
            insert_article2_sql = "insert into article_2 (a_key, a_content, law_id) value (%s, %s, %s)"
            try:
                cursor.execute(insert_article2_sql,
                               (contents[index][2], contents[index][3],
                                contents[index][1]))
                conn.commit()
                print(contents[index][5] + '----' + contents[index][2] +
                      '----------------SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + contents[index][5] + e +
                      ': ARTICLE FAILED---------' + '\033[0m')
                return
            index = index + 1
Example #20
0
def chapter_article_process_2(
):  # 法二:将法律文本的条款信息做进一步分表,分为两类,第一类包含“章”大标题,第二类只含条款
    cursor = conn.cursor()
    pattern_chapter = re.compile("第(.*?)章")
    pattern_article = re.compile("第(.*?)条")
    select_law_id_sql = "select law_id from law_content_parse group by law_id"
    # 先统计出law_id, 保存到list当中
    cursor.execute(select_law_id_sql)
    law_id_tuple = cursor.fetchall()
    law_id_list = list()
    for law in law_id_tuple:
        law_id_list.append(law[0])

    # 按照id查询law_content_parse, 并做处理
    select_law_content_sql = "select * from law_content_parse where law_id = %s"
    for law_id in law_id_list:
        cursor.execute(select_law_content_sql, (law_id, ))
        contents = cursor.fetchall()

        index = 0
        while index < len(contents):
            match_chapter = pattern_chapter.match(contents[index][2])
            if match_chapter:
                # 此处将章的信息插入chapter
                insert_chapter_sql = "insert into chapter (chapter_key, chapter_name, law_id) value (%s, %s, %s)"
                try:
                    cursor.execute(
                        insert_chapter_sql,
                        (contents[index][2], contents[index][3], law_id))
                    conn.commit()
                    print(contents[index][5] + '----' + contents[index][2] +
                          '----------------SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + contents[index][5] + e +
                          ': CHAPTER FAILED---------' + '\033[0m')
                    return

                index = index + 1
                while index < len(contents):
                    select_chapter_sql = 'SELECT id from chapter where id = (SELECT max(id) FROM chapter);'
                    cursor.execute(select_chapter_sql)
                    chapter_id = cursor.fetchone()[0]
                    match_article = pattern_article.match(contents[index][2])
                    if match_article:
                        # 此处插入article信息
                        insert_article1_sql = "insert into article_1 (a_key, a_content, chapter_id) value (%s, %s, %s)"
                        try:
                            cursor.execute(insert_article1_sql,
                                           (contents[index][2],
                                            contents[index][3], chapter_id))
                            conn.commit()
                            print(contents[index][5] + '----' +
                                  contents[index][2] +
                                  '----------------SUCCESS')
                        except Exception as e:
                            conn.rollback()
                            print('\033[1;32;41m' + contents[index][5] + e +
                                  ': ARTICLE FAILED---------' + '\033[0m')
                            return
                        index = index + 1
                    else:
                        print('-----------------------------' +
                              contents[index][5] +
                              '----------------------------------')
                        break
            else:
                insert_article2_sql = "insert into article_2 (a_key, a_content, law_id) value (%s, %s, %s)"
                try:
                    cursor.execute(
                        insert_article2_sql,
                        (contents[index][2], contents[index][3], law_id))
                    conn.commit()
                    print(contents[index][5] + '----' + contents[index][2] +
                          '----------------SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + contents[index][5] + e +
                          ': ARTICLE FAILED---------' + '\033[0m')
                    return
                index = index + 1