Ejemplo n.º 1
0
def violate_punishment_save(is_contain_accord, violate_punishment_accord_info,
                            law_id):
    cursor = conn.cursor()
    if is_contain_accord == 1:
        contain_accord_insert_sql = '''insert into violate_punishment_accord 
                            (violate_law_id, violate_chapter_id, violate_article_id, violate_sentence_id, 
                             punishment_law_id, punishment_chapter_id, punishment_article_id, punishment_sentence_id,
                             accord_law_id, accord_chapter_id, accord_article_id, accord_sentence_id, 
                             violate_content, punishment_content, accord_content, is_contain_accord)
                             value (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
        violate_info = violate_punishment_accord_info['violate_info']
        punishment_info = violate_punishment_accord_info['punishment_info']
        accord_info = violate_punishment_accord_info['accord_info']
        try:
            cursor.execute(
                contain_accord_insert_sql,
                (law_id, violate_info[0], violate_info[1], violate_info[2],
                 law_id, punishment_info[0], punishment_info[1],
                 punishment_info[2], law_id, accord_info[0], accord_info[1],
                 accord_info[2], violate_info[3], punishment_info[3],
                 accord_info[3], int(is_contain_accord)))
            conn.commit()
        except Exception as e:
            conn.rollback()
            print(e)
    else:
        contain_accord_insert_sql = '''insert into violate_punishment_accord 
                                    (violate_law_id, violate_chapter_id, violate_article_id, violate_sentence_id, 
                                     punishment_law_id, punishment_chapter_id, punishment_article_id, punishment_sentence_id,
                                     violate_content, punishment_content, is_contain_accord)
                                     value (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
        violate_info = violate_punishment_accord_info['violate_info']
        punishment_info = violate_punishment_accord_info['punishment_info']
        try:
            cursor.execute(
                contain_accord_insert_sql,
                (law_id, violate_info[0], violate_info[1], violate_info[2],
                 law_id, punishment_info[0], punishment_info[1],
                 punishment_info[2], violate_info[3], punishment_info[3],
                 int(is_contain_accord)))
            conn.commit()
        except Exception as e:
            conn.rollback()
            print(e)
Ejemplo n.º 2
0
def law_info_segment():
    cursor = conn.cursor()
    select_sql = '''select * from law'''
    cursor.execute(select_sql)
    results = cursor.fetchall()
    property_list = []
    for i in LAW_PROPERTY:
        property_list.append(i)
    for res in results:
        law_id = res[0]
        for property_name in property_list:
            index = LAW_PROPERTY[property_name]
            property_val = res[index]
            if property_val is None or property_val == '':
                property_val = '未知'
                update_sql = 'update law set %s' % property_name + ' = %s where id = %s'
                cursor.execute(update_sql, (property_val, law_id))
                conn.commit()
                print(law_id, property_name, res[index], property_val)
Ejemplo n.º 3
0
def insert_new_relation_base_type(relation_type, data):
    cursor = conn.cursor()
    table = 'new_' + relation_type + '_relation'
    insert_sql = "insert into %s" % table + \
                 "(law_id, chapter_id, sentence_id, parse_sentence, subject, relation, object, relation_type) " \
                 "value (%s, %s, %s, %s, %s, %s, %s, %s)"

    cursor.execute(insert_sql, (
        data['law_id'],
        data['chapter_id'],
        data['sentence_id'],
        data['parse_sentence'],
        data['subject'],
        data['relation'],
        data['object'],
        relation_type,
    ))
    conn.commit()
    print(relation_type, 'insert success!')
Ejemplo n.º 4
0
def article_2_key_process():  # 将不包含“章”的条款的条款序号统一为 “第XX条”
    select_sql = "select id, a_key from article_2"
    update_sql = "update article_2 set a_key = %s where id = %s"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    articles = cursor.fetchall()
    for article in articles:
        if '条' not in article[1]:
            article_key = '第' + str(article[1]).replace('、', '') + '条'
            try:
                cursor.execute(update_sql, (article_key, article[0]))
                conn.commit()
                print(
                    str(article[0]) + article_key +
                    '--------------------UPDATE SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + str(article[0]) + article_key + e +
                      ': ARTICLE FAILED---------' + '\033[0m')
def update_forestry_subject():
    query_forestry_subject = '''select * from forestry_subject'''
    query_forbid_1 = '''select forbid_subject from forbid_1 group by forbid_subject'''
    insert_sql = '''insert into forestry_subject (subject) value (%s)'''
    subject_list = []
    CURSOR.execute(query_forestry_subject)
    forestry_subjects = CURSOR.fetchall()
    for subject in forestry_subjects:
        subject_list.append(subject[1])
    CURSOR.execute(query_forbid_1)
    results = CURSOR.fetchall()
    for res in results:
        if res[0] in subject_list:
            continue
        else:
            subject_list.append(res[0])
            CURSOR.execute(insert_sql, (res[0],))
            conn.commit()
            print(res[0])
Ejemplo n.º 6
0
def class_one_sentences_extracte():  # 第一类文本的单句提取
    pattern = re.compile("第(.*?)(?:章|条)")  # 定义正则表达式用以判断是否是第一类
    cursor = conn.cursor()
    select_sql = "select id, law_id, p_key, p_content from law_content_parse"
    cursor.execute(select_sql)
    results = cursor.fetchall()

    complex_count = 0
    single_count = 0
    insert_complex_sentence = "insert into sentences (law_id, title_id, sentence, is_single) " \
                              "value (%s, %s, %s, %s)"
    for res in results:
        if pattern.match(res[2]):
            title_id = res[0]  # law_content_parse的主键ID,对应sentences表中的title_id
            law_id = res[1]  # 对应法律法规id
            if ':' in str(res[3]):
                complex_count = complex_count + 1
                try:
                    cursor.execute(insert_complex_sentence,
                                   (law_id, title_id, res[3], 0))
                    conn.commit()
                    print(str(res[3]) + '-----------Success')
                except Exception as e:
                    print('\033[1;32;41m' + str(res[3]) + ': FAILED---------' +
                          '\033[0m')
                    conn.rollback()
                    print(e)
            else:
                single_count = single_count + 1
                sentences = str(res[3]).split('\n')
                for sentence in sentences:
                    if len(sentence) != 0:
                        try:
                            cursor.execute(insert_complex_sentence,
                                           (law_id, title_id, sentence, 1))
                            conn.commit()
                            print(str(sentence) + '-----------Success')
                        except Exception as e:
                            print('\033[1;32;41m' + sentence +
                                  ': FAILED---------' + '\033[0m')
                            conn.rollback()
                            print(e)
Ejemplo n.º 7
0
def forest_fire_prevention_parser(file_path):
    dir_path = "C:\\Users\\dhz1216\\Desktop\\washing\\森林防火"
    file_name = file_path.split("\\")[-1]
    cursor = conn.cursor()
    select_sql = "select id, name from law where text_name = %s"
    cursor.execute(select_sql, (file_name.split('.')[0]))
    law_id = cursor.fetchone()[0]
    with open(file_path, "r", encoding='gbk', errors='ignore') as f:
        line = f.readline()
        while line:
            if line.startswith('【法规全文】'):
                with open(dir_path + "\\" + file_name, "a") as w:
                    line = line.replace('【法规全文】', '')
                    # line = f.readline()
                    while line:
                        if len(line.lstrip().split(' ')) > 1:
                            key_title = line.lstrip().split(' ')[0]
                            value_content = line.lstrip().split(' ')[1]
                            line = f.readline()
                            while line:
                                if len(line.lstrip().split(' ')) <= 1:
                                    value_content = value_content + line.lstrip().split(' ')[0]
                                    line = f.readline()
                                else:
                                    break
                            w.write(key_title + ':' + value_content + '\n')
                            insert_sql = "insert into law_content (law_id, p_key, p_content, law_class) " \
                                         "value (%s, %s, %s, %s)"
                            try:
                                cursor.execute(insert_sql, (law_id, key_title, value_content, '森林防火'))
                                conn.commit()
                                print('\033[1;37;40m' + file_name + ': PARSE SUCCESS' + '\033[0m')
                            except Exception as e:
                                print(e)
                                conn.rollback()
                                print('\033[1;32;41m' + file_name + ': PARSE FAILED---------' + '\033[0m')

                        else:
                            line = f.readline()
            else:
                line = f.readline()
Ejemplo n.º 8
0
def location_extract():  # 运用pyltp的分词和词性标注,识别法律法规所属地区(省市)
    select_sql = "select id, name from law"
    update_sql = "update law set location = %s where id = %s"
    cursor = conn.cursor()
    select_special_city_sql = "select name from city where name not like '%市'"
    cursor.execute(select_sql)
    results = cursor.fetchall()
    cursor.execute(select_special_city_sql)
    select_special_city = cursor.fetchall()
    special_city_list = list()
    for c in select_special_city:
        special_city_list.append(c[0])

    for result in results:
        title = result[1]
        location = ''
        for city in special_city_list:
            if city in title:
                location = city
                break
        if location is None or location == '':
            words = list(segmentor.segment(title))
            postag = list(postagger.postag(words))
            for index in range(len(words)):
                if postag[index] == 'ns':
                    location = location + words[index]
                    if postag[index + 1] == 'ns':
                        location = location + words[index + 1]
                        if postag[index + 2] == 'ns':
                            location = location + words[index + 2]
                    break

        if location is None or location == '' or len(location) <= 1:
            location = '中华人民共和国'
        try:
            cursor.execute(update_sql, (location, result[0]))
            conn.commit()
            print(str(result[0]) + result[1] + '-----------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + str(result[0]) + result[1] + e + '\033[0m')
Ejemplo n.º 9
0
def law_to_class_process():  # 为mysql中law表添加class_id一列,对应类别的ID
    cursor = conn.cursor()
    law_select = "select id, type, name from law"
    class_select_sql = "select id from law_class where type = %s"
    update_sql = "update law set class_id = %s where id = %s"
    cursor.execute(law_select)
    laws = cursor.fetchall()
    for law in laws:
        law_id = law[0]
        law_type = law[1]
        cursor.execute(class_select_sql, (law_type))
        class_info = cursor.fetchone()
        class_id = class_info[0]
        try:
            cursor.execute(update_sql, (class_id, law_id))
            conn.commit()
            print(law[2] + '-----------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + law[2] + ': PARSE FAILED---------' + e +
                  '\033[0m')
def forbid_act_save(forbid_list_1, forbid_list_2, forbid_list_3):
    insert_sql_1 = '''insert into forbid_1 (law_id, chapter_id, sentence_id, forbid_subject, forbid_action) 
                      value (%s, %s, %s, %s, %s)'''
    insert_sql_2 = '''insert into forbid_2 (law_id, chapter_id, sentence_id, forbid_subject, forbid_action) 
                          value (%s, %s, %s, %s, %s)'''
    insert_sql_3 = '''insert into forbid_3 (law_id, chapter_id, sentence_id, forbid_action) value (%s, %s, %s, %s)'''
    for forbid_act in forbid_list_1:
        law_id = forbid_act[0]
        chapter_id = forbid_act[2]
        sentence_id = forbid_act[3]
        forbid_subject = forbid_act[4]
        forbid_action = forbid_act[5]
        CURSOR.execute(insert_sql_1, (law_id, chapter_id, sentence_id, forbid_subject, forbid_action))
        conn.commit()
        print(forbid_act)

    for forbid_act in forbid_list_2:
        law_id = forbid_act[0]
        chapter_id = forbid_act[2]
        sentence_id = forbid_act[3]
        forbid_subject = forbid_act[4]
        forbid_action = forbid_act[5]
        CURSOR.execute(insert_sql_2, (law_id, chapter_id, sentence_id, forbid_subject, forbid_action))
        conn.commit()
        print(forbid_act)

    for forbid_act in forbid_list_3:
        law_id = forbid_act[0]
        chapter_id = forbid_act[2]
        sentence_id = forbid_act[3]
        forbid_action = forbid_act[4]
        CURSOR.execute(insert_sql_3, (law_id, chapter_id, sentence_id, forbid_action))
        conn.commit()
        print(forbid_act)
Ejemplo n.º 11
0
def update_article():
    select_article_1_sentence = '''select * from article_1_sentence where is_single = 0'''
    select_article_2_sentence = '''select * from article_2_sentence where is_single = 0'''
    update_article_1_sentence = '''update article_1_sentence set content = %s where id = %s'''
    update_article_2_sentence = '''update article_2_sentence set content = %s where id = %s'''
    cursor = conn.cursor()
    cursor.execute(select_article_1_sentence)
    article_1_sentences = cursor.fetchall()
    for sentence in article_1_sentences:
        sentence_id = sentence[0]
        content = sentence[3] + ':'
        try:
            cursor.execute(update_article_1_sentence, (content, sentence_id))
            conn.commit()
            print(str(sentence_id), '-1-', content,
                  '------------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m', str(sentence_id), '-2-', e, '-------FAILED',
                  '\033[0m')

    print(
        '\n',
        '=============================================================================================',
        '\n')

    cursor.execute(select_article_2_sentence)
    article_2_sentences = cursor.fetchall()
    for sentence in article_2_sentences:
        sentence_id = sentence[0]
        content = sentence[3] + ':'
        try:
            cursor.execute(update_article_2_sentence, (content, sentence_id))
            conn.commit()
            print(str(sentence_id), '-2-', content,
                  '------------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m', str(sentence_id), '-2-', e, '-------FAILED',
                  '\033[0m')
Ejemplo n.º 12
0
def save_relation(relation_list, law_id, content_class, chapter_id,
                  sentence_id):
    cursor = conn.cursor()
    insert_sql = '''insert into extract_relation 
                    (law_id, class, chapter_id, sentence_id, is_contain, subject, relation, object)
                    value (%s, %s, %s, %s, %s, %s, %s, %s)'''
    for relation in relation_list:
        subject = relation[0]
        relation_name = relation[1]
        object = relation[2]
        is_contain = 0
        if object == '根据章节条款信息补全list':
            is_contain = 1
        try:
            cursor.execute(insert_sql,
                           (law_id, content_class, chapter_id, sentence_id,
                            is_contain, subject, relation_name, object))
            conn.commit()
            print(subject, relation_name, object, '--------saved--------')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + relation + e + ': FAILED---------' +
                  '\033[0m')
def merge_forbid_action():
    select_fobid_2 = '''select law_id, chapter_id, sentence_id, forbid_subject from forbid_2 
                        GROUP BY law_id, forbid_subject, chapter_id, sentence_id'''
    select_forbid_3 = '''select law_id, chapter_id, sentence_id, forbid_action from forbid_3'''
    insert_sql = '''insert into forbid_action (law_id, chapter_id, sentence_id, forbid_action) value (%s, %s, %s, %s)'''
    CURSOR.execute(select_fobid_2)
    results_2 = CURSOR.fetchall()
    for res in results_2:
        law_id = res[0]
        chapter_id = res[1]
        sentence_id = res[2]
        forbid_action = res[3]
        CURSOR.execute(insert_sql, (law_id, chapter_id, sentence_id, forbid_action))
        conn.commit()
    CURSOR.execute(select_forbid_3)
    results_3 = CURSOR.fetchall()
    for res in results_3:
        law_id = res[0]
        chapter_id = res[1]
        sentence_id = res[2]
        forbid_action = res[3]
        CURSOR.execute(insert_sql, (law_id, chapter_id, sentence_id, forbid_action))
        conn.commit()
Ejemplo n.º 14
0
def init_relation_collection(filter_colum, key, relation_type, num):
    cursor = conn.cursor()
    key_word = '%' + key + '%'
    select_srl_results = "select * from semantic_role_label_result where %s" % filter_colum \
                         + " like %s and parse_sentence not like %s group by parse_sentence"

    insert_relation_classify = '''insert into relation_classify 
    (law_id, class, chapter_id, sentence_id, complete_sentence, parse_sentence, relation_type, is_complex)
    value (%s, %s, %s, %s, %s, %s, %s, %s)'''

    num_reg = '[0-9]+'
    head_reg = '^[一二三四五六七八(1234567890]'
    count = 0
    cursor.execute(select_srl_results, ('%' + key + '%', '%所有权%'))
    define_resutlts = cursor.fetchall()
    for res in define_resutlts:
        parse_sentence = str(res[6]).strip()
        if re.search(num_reg, parse_sentence) or re.search(
                head_reg, parse_sentence):
            continue
        else:
            count = count + 1
            if count % num == 0:
                law_id = res[1]
                article_class = res[2]
                chapter_id = res[3]
                sentence_id = res[4]
                complete_sentence = res[5]
                is_comlex = res[10]
                cursor.execute(insert_relation_classify,
                               (law_id, article_class, chapter_id, sentence_id,
                                complete_sentence, parse_sentence,
                                relation_type, is_comlex))
                conn.commit()
                print(relation_type, ' insert success')
                # print(parse_sentence)
    print(count)
Ejemplo n.º 15
0
def article_1_sentence_extract():  # 将article_1 的句子尽心分割提取
    select_sql = "select * from article_1"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    articles = cursor.fetchall()
    for article in articles:
        article_1_id = article[0]
        article_1_content = article[2]
        insert_article_1_sentence_sql = '''insert into article_1_sentence (article_1_id, is_single, content) value (%s, %s, %s)'''
        if ':' in article_1_content:
            is_single = 0
            article_1_sentence_content = str(article_1_content).split(
                ':')[0].replace(" ", "")
            try:
                cursor.execute(
                    insert_article_1_sentence_sql,
                    (article_1_id, is_single, article_1_sentence_content))
                conn.commit()
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + str(article_1_id) +
                      article_1_sentence_content + e + ': FAILED---------' +
                      '\033[0m')

            article_1_clauses = str(article_1_content).split(':')[1].split(
                "\n")
            select_article1_sentence_id = '''SELECT id from article_1_sentence where id = (SELECT max(id) FROM article_1_sentence);'''
            cursor.execute(select_article1_sentence_id)
            sentence_id = cursor.fetchone()[0]
            for article_1_clause in article_1_clauses:
                if article_1_clause is not None and article_1_clause != '':
                    insert_article_1_clause_sql = '''insert into article_1_clause (article_1_id, article_1_sentence_id, clause_content) value (%s, %s, %s)'''
                    try:
                        cursor.execute(
                            insert_article_1_clause_sql,
                            (article_1_id, sentence_id,
                             str(article_1_clause).replace(" ", "")))
                        conn.commit()
                    except Exception as e:
                        conn.rollback()
                        print('\033[1;32;41m' + str(article_1_id) +
                              article_1_clause + e + ': FAILED---------' +
                              '\033[0m')
            print(article[2] +
                  '============================================SUCCESS')
        else:
            is_single = 1
            try:
                cursor.execute(insert_article_1_sentence_sql,
                               (article_1_id, is_single, article_1_content))
                conn.commit()
                print(article_1_content +
                      '=========================================SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + str(article_1_id) + '--' + e +
                      ': FAILED---------' + '\033[0m')
Ejemplo n.º 16
0
def law_province_code_update():  # 提取法律所在省份的省份代码并更新law表的province_code字段
    select_sql = "select id, name, location, location_code from law"
    update_sql = "update law set province_code = %s where id = %s"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    laws = cursor.fetchall()
    for law in laws:
        law_id = law[0]
        law_name = law[1]
        law_location = law[2]
        if law[3] is not None:  # location_code不为None的时候更新
            law_location_code = law[3]
            province_code = str(law_location_code)[0:2] + '0000'
        else:
            province_code = '000000'

        try:
            cursor.execute(update_sql, (province_code, law_id))
            conn.commit()
            print(law_name + '-------------------SUCCESS')
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + law_name + ': FAILED---------' + e +
                  '\033[0m')
def entity_wash():
    cursor = conn.cursor()
    chinese_pattern = "[\\u4e00-\\u9fa5\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]+"
    for class_type in SINGLE_RELATION_CLASS:
        table_name = class_type + '_relation'
        select_sql = 'select * from %s' % table_name
        cursor.execute(select_sql)
        results = cursor.fetchall()
        for relation in results:
            id = relation[0]
            subject = relation[4]
            object = relation[6]
            cn_pattern_res = re.search(chinese_pattern, subject, re.M | re.I)
            if cn_pattern_res and cn_pattern_res.start(
            ) > 0 and cn_pattern_res.start() < 4:
                subject = cn_pattern_res.group(0)
            if subject[-1] == ',' or subject[-1] == '。' or subject[-1] == ',':
                subject = subject[:-1]
            if object[-1] == ',' or object[-1] == '。' or object[-1] == ',':
                object = object[:-1]
            update_sql = 'update %s ' % table_name + 'set subject = %s, object = %s where id = %s'
            cursor.execute(update_sql, (subject, object, id))
            conn.commit()
            print(class_type, id, subject, object)
Ejemplo n.º 18
0
def subject_type_classify():
    park_words = [
        '公园', '保护区', '区域', '名胜区', '风景区', '景区', '地区', '区', '湿地', '范围', '基地',
        '山', '湖', '景观'
    ]
    org_words = [
        '政府', '机构', '部门', '企业', '局', '厅', '单位', '指挥部', '机关', '管委会', '委员会',
        '公司', '站', '部', '委', '办'
    ]
    park_list = []
    org_list = []
    cursor = conn.cursor()
    select_sql = '''select * from new_forestry_subject_final'''
    update_sql = '''update new_forestry_subject_final set subject_type = %s where id = %s'''
    cursor.execute(select_sql)
    results = cursor.fetchall()
    for res in results:
        id = res[0]
        subject = res[1]
        subject_type = ''
        for p_word in park_words:
            if str(subject).endswith(p_word):
                subject_type = 'PARK'
                park_list.append(subject)

        for o_word in org_words:
            if str(subject).endswith(o_word):
                subject_type = 'ORG'
                org_list.append(subject)

        if subject_type == '':
            subject_type = 'FORESTRY'

        cursor.execute(update_sql, (subject_type, id))
        conn.commit()
        print(subject, 'update success!')
Ejemplo n.º 19
0
def complex_main_sentence_analysis():  # 非单句主句依存句法分析以及语义角色标注,结果入库

    start_time = time.time()

    select_sql = '''select * from sentences where is_single = 0'''
    cursor = conn.cursor()
    cursor.execute(select_sql)
    complex_sentences = cursor.fetchall()
    for sentence in complex_sentences:
        sentence_id = sentence[0]
        main_sentence = str(sentence[3]).strip().split(':')[0] + ':'
        origin_words = list(segmentor.segment(main_sentence))  # 分词
        origin_postags = list(postagger.postag(origin_words))  # 词性标注
        arcs = parser.parse(origin_words, origin_postags)  # 依存句法分析
        roles = labeller.label(origin_words, origin_postags, arcs)  # 语义角色标注

        print('语义角色标注--------', str(len(roles)))  # 语义角色标注信息提取并存入数据库
        core_verb_list = list()
        insert_role_label_sql = '''insert into role_label (sentence_id, arg_name, arg_start, arg_end, core_verb_index) value (%s, %s, %s, %s, %s)'''
        for role in roles:
            core_verb_list.append(role.index)  # 建立核心动词索引列表
            for arg in role.arguments:
                arg_name = arg.name
                arg_start = arg.range.start
                arg_end = arg.range.end
                # 将语义角色标注信息插入到role_label表中
                try:
                    cursor.execute(insert_role_label_sql,
                                   (sentence_id, arg_name, arg_start, arg_end,
                                    role.index))
                    conn.commit()
                    print(str(sentence_id), main_sentence, '-----------',
                          origin_words[role.index], arg_name,
                          '-------------SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + str(sentence_id) + main_sentence +
                          e + ': ---------FAILED---------' + '\033[0m')

        # 提取动词信息,并插入数据库
        print('提取动词信息-------------------------------------------')
        insert_verb_sql = '''insert into verb (sentence_id, part_of_speech, loc_index, is_core) value (%s, %s, %s, %s)'''
        for index in range(len(origin_words)):
            if origin_postags[index] == 'v':
                is_core = 0
                if index in core_verb_list:
                    is_core = 1
                try:
                    cursor.execute(insert_verb_sql,
                                   (sentence_id, 'v', index, is_core))
                    conn.commit()
                    print(str(index), '--', origin_words[index],
                          '-----------------SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + str(index) + origin_words[index] +
                          e + ': ---------FAILED---------' + '\033[0m')

        # 提取其他词和动词的关系,没有关系的设为NONE
        arc_head = [a.head for a in arcs]
        arc_relation = [a.relation for a in arcs]
        tree_node_list = ['ROOT'] + origin_words
        postags = ['NONE'] + origin_postags
        for i in range(len(arc_head)):
            j = arc_head[i]
            head_index = j - 1
            tail_index = i
            relation = arc_relation[i]

            if arc_relation[i] == 'HED':
                update_verb_sql = '''update verb set is_head = 1 where sentence_id = %s and loc_index = %s'''
                print('更新根动词情况:')
                try:
                    cursor.execute(update_verb_sql, (sentence_id, i))
                    conn.commit()
                    print('根动词-----index: ', str(i), '----', origin_words[i],
                          '-----UPDATE SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + '根动词---' + str(i) +
                          origin_words[i] + e + ': ---------FAILED---------' +
                          '\033[0m')
                continue

            if head_index not in core_verb_list and tail_index not in core_verb_list:
                continue
            elif head_index in core_verb_list:
                part_of_speech = postags[i + 1]
                core_verb_index = head_index
                word = origin_words[i]
                loc = 'tail'
            else:
                part_of_speech = postags[j]
                core_verb_index = tail_index
                word = tree_node_list[j]
                loc = 'head'
            # TODO:----------------------------数据库插入操作--------------------------------------------
            insert_words_sql = '''insert into words (sentence_id, part_of_speech, core_verb_index, relation, word, head_or_tail) 
                                  value (%s, %s, %s, %s, %s, %s)'''
            try:
                cursor.execute(insert_words_sql,
                               (sentence_id, part_of_speech, core_verb_index,
                                relation, word, loc))
                conn.commit()
                print(tree_node_list[j], postags[j], '----', origin_words[i],
                      postags[i + 1], relation, '-----SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m', tree_node_list[j], postags[j], '----',
                      origin_words[i], postags[i + 1], relation, '-----FAILED',
                      e, '\033[0m')

        print(
            '\n',
            '===============================================================',
            '\n')

    end_time = time.time()
    print('处理', str(len(complex_sentences)), '条数据的总耗时为:',
          str(end_time - start_time), 's')
Ejemplo n.º 20
0
def chapter_article_process(
):  # 法一(有问题):将法律文本的条款信息做进一步分表,分为两类,第一类包含“章”大标题,第二类只含条款
    select_sql = "select * from law_content_parse"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    contents = cursor.fetchall()

    index = 0
    while index < len(contents):
        pattern_chapter = re.compile("第(.*?)章")
        pattern_article = re.compile("第(.*?)条")
        match_chapter = pattern_chapter.match(contents[index][2])
        if match_chapter:
            # 此处将章的信息插入chapter
            insert_chapter_sql = "insert into chapter (chapter_key, chapter_name, law_id) value (%s, %s, %s)"
            try:
                cursor.execute(insert_chapter_sql,
                               (contents[index][2], contents[index][3],
                                contents[index][1]))
                conn.commit()
                print(contents[index][5] + '----' + contents[index][2] +
                      '----------------SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + contents[index][5] + e +
                      ': CHAPTER FAILED---------' + '\033[0m')
                return

            index = index + 1
            while index < len(contents):
                select_chapter_sql = 'SELECT id from chapter where id = (SELECT max(id) FROM chapter);'
                cursor.execute(select_chapter_sql)  # TODO: 此处添加逻辑判断是否已经读到下一篇法规
                chapter_id = cursor.fetchone()[0]
                match_article = pattern_article.match(contents[index][2])
                if match_article:
                    # 此处插入article信息
                    insert_article1_sql = "insert into article_1 (a_key, a_content, chapter_id) value (%s, %s, %s)"
                    try:
                        cursor.execute(insert_article1_sql,
                                       (contents[index][2], contents[index][3],
                                        chapter_id))
                        conn.commit()
                        print(contents[index][5] + '----' +
                              contents[index][2] + '----------------SUCCESS')
                    except Exception as e:
                        conn.rollback()
                        print('\033[1;32;41m' + contents[index][5] + e +
                              ': ARTICLE FAILED---------' + '\033[0m')
                        return
                    index = index + 1
                else:
                    print('-----------------------------' +
                          contents[index][5] +
                          '----------------------------------')
                    break
        else:
            insert_article2_sql = "insert into article_2 (a_key, a_content, law_id) value (%s, %s, %s)"
            try:
                cursor.execute(insert_article2_sql,
                               (contents[index][2], contents[index][3],
                                contents[index][1]))
                conn.commit()
                print(contents[index][5] + '----' + contents[index][2] +
                      '----------------SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + contents[index][5] + e +
                      ': ARTICLE FAILED---------' + '\033[0m')
                return
            index = index + 1
Ejemplo n.º 21
0
def chapter_article_process_2(
):  # 法二:将法律文本的条款信息做进一步分表,分为两类,第一类包含“章”大标题,第二类只含条款
    cursor = conn.cursor()
    pattern_chapter = re.compile("第(.*?)章")
    pattern_article = re.compile("第(.*?)条")
    select_law_id_sql = "select law_id from law_content_parse group by law_id"
    # 先统计出law_id, 保存到list当中
    cursor.execute(select_law_id_sql)
    law_id_tuple = cursor.fetchall()
    law_id_list = list()
    for law in law_id_tuple:
        law_id_list.append(law[0])

    # 按照id查询law_content_parse, 并做处理
    select_law_content_sql = "select * from law_content_parse where law_id = %s"
    for law_id in law_id_list:
        cursor.execute(select_law_content_sql, (law_id, ))
        contents = cursor.fetchall()

        index = 0
        while index < len(contents):
            match_chapter = pattern_chapter.match(contents[index][2])
            if match_chapter:
                # 此处将章的信息插入chapter
                insert_chapter_sql = "insert into chapter (chapter_key, chapter_name, law_id) value (%s, %s, %s)"
                try:
                    cursor.execute(
                        insert_chapter_sql,
                        (contents[index][2], contents[index][3], law_id))
                    conn.commit()
                    print(contents[index][5] + '----' + contents[index][2] +
                          '----------------SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + contents[index][5] + e +
                          ': CHAPTER FAILED---------' + '\033[0m')
                    return

                index = index + 1
                while index < len(contents):
                    select_chapter_sql = 'SELECT id from chapter where id = (SELECT max(id) FROM chapter);'
                    cursor.execute(select_chapter_sql)
                    chapter_id = cursor.fetchone()[0]
                    match_article = pattern_article.match(contents[index][2])
                    if match_article:
                        # 此处插入article信息
                        insert_article1_sql = "insert into article_1 (a_key, a_content, chapter_id) value (%s, %s, %s)"
                        try:
                            cursor.execute(insert_article1_sql,
                                           (contents[index][2],
                                            contents[index][3], chapter_id))
                            conn.commit()
                            print(contents[index][5] + '----' +
                                  contents[index][2] +
                                  '----------------SUCCESS')
                        except Exception as e:
                            conn.rollback()
                            print('\033[1;32;41m' + contents[index][5] + e +
                                  ': ARTICLE FAILED---------' + '\033[0m')
                            return
                        index = index + 1
                    else:
                        print('-----------------------------' +
                              contents[index][5] +
                              '----------------------------------')
                        break
            else:
                insert_article2_sql = "insert into article_2 (a_key, a_content, law_id) value (%s, %s, %s)"
                try:
                    cursor.execute(
                        insert_article2_sql,
                        (contents[index][2], contents[index][3], law_id))
                    conn.commit()
                    print(contents[index][5] + '----' + contents[index][2] +
                          '----------------SUCCESS')
                except Exception as e:
                    conn.rollback()
                    print('\033[1;32;41m' + contents[index][5] + e +
                          ': ARTICLE FAILED---------' + '\033[0m')
                    return
                index = index + 1