예제 #1
0
def insert_new_forestry_subject(subject_set):
    insert_sql = '''insert into new_forestry_subject (subject) value (%s)'''
    cursor = conn.cursor()
    for subject in subject_set:
        cursor.execute(insert_sql, (subject, ))
        conn.commit()
        print(subject, 'insert success!')
def get_correlation_sentences():
    cursor = conn.cursor()
    select_article_1_sql = '''select * from article_1'''
    select_article_2_sql = '''select * from article_2'''
    cursor.execute(select_article_1_sql)
    results_1 = cursor.fetchall()
    pattern_1 = "^本(办法|条例|规定|法|细则)(.*?)由(.*?)"
    chapter_to_law_dict = get_chapter_law_mapping()
    count = 0
    sentences_list = list()
    for result in results_1:
        law_id = chapter_to_law_dict[result[3]]
        contents = list(filter(None, str(result[2]).strip().split('\n')))
        for content in contents:
            if re.findall(pattern_1, content.strip()):
                count = count + 1
                if len(content.strip()) > 150:
                    continue
                sentences_list.append(tuple((law_id, content.strip())))

    cursor.execute(select_article_2_sql)
    results_2 = cursor.fetchall()
    for result in results_2:
        law_id = result[3]
        contents = list(filter(None, str(result[2]).strip().split('\n')))
        for content in contents:
            if re.findall(pattern_1, content.strip()):
                count = count + 1
                if len(content.strip()) > 150:
                    continue
                sentences_list.append(tuple((law_id, content.strip())))
    print(count)
    return sentences_list
예제 #3
0
def get_relation_collection():
    cursor = conn.cursor()
    query_for_relation_classify = '''select * from relation_classify'''
    cursor.execute(query_for_relation_classify)
    relation_classify_results = cursor.fetchall()
    relation_classify_dict = {
        'define': [],
        'aim': [],
        'application_scope': [],
        'contain': [],
        'duty': [],
        'right': [],
        'accord': [],
        'forbid': [],
        'punishment': []
    }
    for res in relation_classify_results:
        law_id = res[1]
        article_class = res[2]
        chapter_id = res[3]
        sentence_id = res[4]
        parse_sentence = res[6]
        relation_type = res[7]
        relation_classify_dict[relation_type].append(
            tuple((law_id, article_class, chapter_id, sentence_id,
                   parse_sentence, relation_type)))
    return relation_classify_dict
예제 #4
0
def get_relation_by_type(relation_type):
    cursor = conn.cursor()
    select_sql = '''select * from new_relation where relation_type = %s'''
    cursor.execute(select_sql, (relation_type))
    relation_results = cursor.fetchall()

    return relation_results
예제 #5
0
def relation_collection_expand(filter_colum, key, relation_type):
    cursor = conn.cursor()
    key_word = '%' + key + '%'
    select_srl_results = "select * from semantic_role_label_result where %s" % filter_colum \
                         + " like %s and parse_sentence not like %s group by parse_sentence"

    insert_relation_classify = '''insert into relation_classify 
    (law_id, class, chapter_id, sentence_id, complete_sentence, parse_sentence, relation_type, is_complex)
    value (%s, %s, %s, %s, %s, %s, %s, %s)'''

    num_reg = '[0-9]+'
    head_reg = '^[一二三四五六七八(1234567890]'
    count = 0
    cursor.execute(select_srl_results, ('%' + key + '%', '%所有权%'))
    define_resutlts = cursor.fetchall()
    for res in define_resutlts:
        parse_sentence = str(res[6]).strip()
        count = count + 1

        law_id = res[1]
        article_class = res[2]
        chapter_id = res[3]
        sentence_id = res[4]
        complete_sentence = res[5]
        is_comlex = res[10]
        cursor.execute(
            insert_relation_classify,
            (law_id, article_class, chapter_id, sentence_id, complete_sentence,
             parse_sentence, relation_type, is_comlex))
        conn.commit()
        print(relation_type, ' insert success')
        # print(parse_sentence)
    print(relation_type, count)
    return count
예제 #6
0
def search():
    content = request.args.get('content', '')
    search_words = list(segmentor.segment(content))
    cursor = conn.cursor()
    select_sql = '''select name, type from law'''
    cursor.execute(select_sql)
    law_info_dict = {}
    law_results = cursor.fetchall()
    for law in law_results:
        law_name = law[0]
        law_name_words = list(segmentor.segment(law_name))
        match_count = 0
        for w in law_name_words:
            if w in search_words:
                match_count = match_count + 1
        if (len(search_words) <= 3 and match_count >= 1) or match_count >= 3:
            law_info_dict.update({law[0]: match_count})

    if len(law_info_dict) > 0:
        match_res = sorted(law_info_dict.items(),
                           key=lambda x: x[1],
                           reverse=True)
        result = {'status': 200, 'data': match_res}
    else:
        result = {'status': 0, 'message': '未找到相关法律法规!'}
    return result
예제 #7
0
def query_and_match(query_sql, article_type):
    cursor = conn.cursor()
    cursor.execute(query_sql)
    results_1 = cursor.fetchall()
    results_detail = []
    results_accord = []
    accord_count = 0
    detail_count = 0
    output_file = "C:\\Users\\dhz\\Desktop\\template\\punishment_content.txt"
    with open(output_file, "a") as w:
        for res in results_1:
            content = str(res[3]).strip()
            pattern_res_detail = re.search(PUNISHMENT_PATTERN_DETAIL, content,
                                           re.M | re.I)
            pattern_res_accord = re.search(PUNISHMENT_AND_ACCORD_PATTERN,
                                           content, re.M | re.I)
            if pattern_res_accord:
                accord_count = accord_count + 1
                results_accord.append(res)
            elif pattern_res_detail:
                # w.write(content.replace("\n", "") + '\n')
                detail_count = detail_count + 1
                results_detail.append(res)
    print(detail_count)
    print(accord_count)
    return results_detail, results_accord, article_type
예제 #8
0
def detail_content_parser():
    cursor = conn.cursor()
    select_sql = "select law_id, p_key, p_content from law_content_parse where id < 101"
    cursor.execute(select_sql)
    results = cursor.fetchall()
    for res in results:
        print(res[1] + ': ' + str(res[2]).split('\n')[0])
예제 #9
0
def complex_extraction():
    query_for_parse_sentence = '''select complete_sentence, parse_sentence, class, sentence_id 
                                  from dependency_parsing_result 
                                  where is_complex = 1 and sentence_id > 9481 group by parse_sentence order by id'''
    cursor = conn.cursor()
    cursor.execute(query_for_parse_sentence)
    parse_sentences = cursor.fetchall()
    for parse_sentence in parse_sentences:
        complete_sentence = parse_sentence[0]
        parsing_sentence = parse_sentence[1]
        cursor.execute(
            SELECT_DP_SQL,
            (parse_sentence[1], parse_sentence[2], parse_sentence[3]))
        dp_results = cursor.fetchall()
        cursor.execute(
            SELECT_SDP_SQL,
            (parse_sentence[1], parse_sentence[2], parse_sentence[3]))
        sdp_results = cursor.fetchall()
        cursor.execute(
            SELECT_SRL_SQL,
            (parse_sentence[1], parse_sentence[2], parse_sentence[3]))
        srl_results = decode_srl_results(cursor.fetchall())
        # TODO: 调用复杂句关系抽取核心
        complex_extraction_core(dp_results, sdp_results, srl_results,
                                complete_sentence, parsing_sentence)
예제 #10
0
def parsing_and_semantic_analysis(relation_type, relation_collect):
    cursor = conn.cursor()

    output_path = "G:\\analysis\\" + relation_type + ".txt"
    with open(output_path, "a") as w:
        for res in relation_collect:
            law_id = res[1]
            article_class = res[2]
            chapter_id = res[3]
            sentence_id = res[4]
            parse_sentence = res[6]
            relation_type = res[7]
            w.write(parse_sentence + '\n')

            cursor.execute(SELECT_SRL_SQL, (law_id, article_class, chapter_id,
                                            sentence_id, parse_sentence))
            srl_results = cursor.fetchall()
            srl_dict = srl_for_verb(srl_results)
            for verb in srl_dict:
                w.write(verb + ':')
                for role_label in srl_dict[verb]:
                    w.write(role_label + '\t')
                w.write('\n')
            w.write(
                '\n================================================================================\n'
            )
            cursor.execute(SELECT_DP_SQL, (law_id, article_class, chapter_id,
                                           sentence_id, parse_sentence))
            dp_results = cursor.fetchall()
            for dp in dp_results:
                w.write(dp[7] + '  --------  ' + dp[8] + '  -------  ' +
                        dp[9] + '\n')
            w.write(
                '\n********************************************************************************\n'
            )
예제 #11
0
def city_index(word):  # 城市索引对应
    cursor = conn.cursor()
    city_select_sql = 'select * from city'  # 查询标准城市
    cursor.execute(city_select_sql)
    cities = cursor.fetchall()
    city_dict = dict()
    for city in cities:
        city_key = city[2].replace(' ', '').replace('市',
                                                    '')  # 将“省”,“市”以及空格去掉,方便比较
        city_dict.update({city_key: city[1]})
    word = word.replace(' ', '').replace('市', '')

    select_special_city_sql = "select name from city where name not like '%市'"
    cursor.execute(select_special_city_sql)
    select_special_city = cursor.fetchall()
    special_city_list = list()
    for c in select_special_city:
        special_city_list.append(c[0])

    if word in city_dict:  # 判断是否有对应的城市
        code = city_dict[word]
        if word not in special_city_list:
            word = word + '市'
        return {word: code}
    else:
        return None
예제 #12
0
def province_index(word):  # 省份索引对应
    cursor = conn.cursor()
    province_select_sql = 'select * from province'  # 查询标准省份
    cursor.execute(province_select_sql)
    provinces = cursor.fetchall()
    province_dict = dict()
    for province in provinces:
        province_key = province[2].replace(' ', '').replace('省', '').replace(
            '市', '')  # 将“省”,“市”以及空格去掉,方便比较
        province_dict.update({province_key: province[1]})
    word = word.replace(' ', '').replace('省', '').replace('市', '')

    special_province = ['北京', '上海', '重庆', '天津']
    special_area = ['澳门', '香港']
    if word in province_dict:  # 判断是否有对应的省份
        code = province_dict[word]
        if word in special_province:
            word = word + '市'
        elif word in special_area:
            word = word + '特别行政区'
        else:
            word = word + '省'
        return {word: code}
    else:
        return None
def subject_save(subject_set):
    cursor = conn.cursor()
    insert_sql = '''insert into forestry_subject (subject) value (%s)'''
    for subject in subject_set:
        cursor.execute(insert_sql, (subject, ))
        conn.commit()
        print(subject)
    print(len(subject_set))
예제 #14
0
def get_article_2_map_dict():
    article_to_law_dict = dict()
    cursor = conn.cursor()
    query_for_article_2_sql = '''select id, a_key, law_id from article_2'''
    cursor.execute(query_for_article_2_sql)
    results = cursor.fetchall()
    for res in results:
        article_to_law_dict.update({res[0]: tuple((res[1], res[2]))})
    return article_to_law_dict
예제 #15
0
def get_article_2_map_dict():
    select_article_2_sql = '''select id, law_id from article_2'''
    cursor = conn.cursor()
    cursor.execute(select_article_2_sql)
    article_law_dict = dict()
    results = cursor.fetchall()
    for res in results:
        article_law_dict.update({res[0]: res[1]})
    return article_law_dict
예제 #16
0
def article_1_sentence_extract():  # 将article_1 的句子尽心分割提取
    select_sql = "select * from article_1"
    cursor = conn.cursor()
    cursor.execute(select_sql)
    articles = cursor.fetchall()
    for article in articles:
        article_1_id = article[0]
        article_1_content = article[2]
        insert_article_1_sentence_sql = '''insert into article_1_sentence (article_1_id, is_single, content) value (%s, %s, %s)'''
        if ':' in article_1_content:
            is_single = 0
            article_1_sentence_content = str(article_1_content).split(
                ':')[0].replace(" ", "")
            try:
                cursor.execute(
                    insert_article_1_sentence_sql,
                    (article_1_id, is_single, article_1_sentence_content))
                conn.commit()
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + str(article_1_id) +
                      article_1_sentence_content + e + ': FAILED---------' +
                      '\033[0m')

            article_1_clauses = str(article_1_content).split(':')[1].split(
                "\n")
            select_article1_sentence_id = '''SELECT id from article_1_sentence where id = (SELECT max(id) FROM article_1_sentence);'''
            cursor.execute(select_article1_sentence_id)
            sentence_id = cursor.fetchone()[0]
            for article_1_clause in article_1_clauses:
                if article_1_clause is not None and article_1_clause != '':
                    insert_article_1_clause_sql = '''insert into article_1_clause (article_1_id, article_1_sentence_id, clause_content) value (%s, %s, %s)'''
                    try:
                        cursor.execute(
                            insert_article_1_clause_sql,
                            (article_1_id, sentence_id,
                             str(article_1_clause).replace(" ", "")))
                        conn.commit()
                    except Exception as e:
                        conn.rollback()
                        print('\033[1;32;41m' + str(article_1_id) +
                              article_1_clause + e + ': FAILED---------' +
                              '\033[0m')
            print(article[2] +
                  '============================================SUCCESS')
        else:
            is_single = 1
            try:
                cursor.execute(insert_article_1_sentence_sql,
                               (article_1_id, is_single, article_1_content))
                conn.commit()
                print(article_1_content +
                      '=========================================SUCCESS')
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + str(article_1_id) + '--' + e +
                      ': FAILED---------' + '\033[0m')
def test_pyltp_sentence_split():
    cursor = conn.cursor()
    select_sql = "select p_content from law_content_parse"
    cursor.execute(select_sql)
    results = cursor.fetchall()
    count = 0
    for res in results:
        if ':' in res[0] and count == 0:
            sens = SentenceSplitter.split(res[0])
            print('\n'.join(sens))
            count = count + 1
예제 #18
0
def wash_law(table_name):
    cursor = conn.cursor()
    select_sql = "select id, subject from %s" % table_name + " where subject like %s"
    cursor.execute(select_sql, ('%' + 'law' + '%', ))
    results = cursor.fetchall()
    update_sql = "update %s " % table_name + "set subject = %s where id = %s"
    for res in results:
        subject = res[1][4:]
        cursor.execute(update_sql, (subject, res[0]))
        conn.commit()
        print(res[0], 'success')
예제 #19
0
def get_law_aim():
    cursor = conn.cursor()
    select_sql = '''select * from law_aim group by law_id'''
    cursor.execute(select_sql)
    aim_results = cursor.fetchall()
    aim_dict = {}
    for aim in aim_results:
        law_id = aim[1]
        aim_content = aim[2]
        aim_dict.update({law_id: aim_content})
    return aim_dict
예제 #20
0
def create_test_txt():
    cursor = conn.cursor()
    select_sql = '''select * from article_1_sentence where is_single = 0 limit 0, 500'''
    cursor.execute(select_sql)
    sentences = cursor.fetchall()
    with open(r"C:\Users\dhz1216\Desktop\test\law_input.txt", "a") as w:
        for sentence in sentences:
            contents = SentenceSplitter.split(sentence[3])
            for content in contents:
                if content is not None and content != '' and ':' in content:
                    w.write(str(content).strip() + '\n')
예제 #21
0
def update_law(location, location_code, location_level, law_id):
    cursor = conn.cursor()
    update_sql = "update law set location = %s, location_code = %s, location_level = %s where id = %s"
    try:
        cursor.execute(update_sql,
                       (location, location_code, location_level, law_id))
        conn.commit()
        print(str(law_id) + '-----------------------UPDATE SUCCESS')
    except Exception as e:
        conn.rollback()
        print('\033[1;32;41m' + str(law_id) + ': FAILED---------' + e +
              '\033[0m')
예제 #22
0
def relation_wash(define_relation_list, relation_type):
    law_reg = "本(.*)所称(.*)"
    duty_reg = "(.*)的主要职责"
    head_reg = "^[一二三四五六七八九]"
    special_reg1 = "[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”《》〔〕:;!,。?、~@#¥%……&*()]+"
    special_reg2 = "[1234567890.]+"
    num_reg = "^[((]"
    update_sql = '''update new_relation set subject = %s where id = %s'''
    update_sql2 = '''update new_relation set subject = %s, relation_type = %s where id = %s'''
    cursor = conn.cursor()

    subject_list = []
    law_aim_dict = get_law_aim()
    for relation in define_relation_list:
        id = relation[0]
        law_id = relation[1]
        chapter_id = relation[2]
        sentence_id = relation[3]
        parse_sentence = relation[4]
        subject = relation[5]
        object = relation[7]

        subject = re.sub(special_reg2, '', subject)
        object = re.sub(special_reg2, '', object)
        if re.search(num_reg, subject):
            subject = subject[3:]
        if subject.startswith('、'):
            subject = subject[1:]
        if subject.endswith(',') or subject.endswith('、') or subject.endswith(',') or\
                subject.endswith('。') or subject.endswith(';') or subject.endswith(':'):
            subject = subject[:-1]

        if object.startswith('、'):
            object = object[1:]
        if object.endswith(',') or object.endswith('、') or object.endswith(',') or \
                object.endswith('。') or object.endswith(';') or object.endswith(':'):
            object = object[:-1]

        if len(subject) < 15:
            subject_list.append(subject)

        data = {
            'law_id': law_id,
            'chapter_id': chapter_id,
            'sentence_id': sentence_id,
            'parse_sentence': parse_sentence,
            'subject': subject,
            'relation': '权利/义务',
            'object': object
        }
        insert_new_relation_base_type(relation_type, data)
    subject_set = list(set(subject_list))
    insert_new_forestry_subject(subject_set)
예제 #23
0
def law_classify():  # 法律法规文本归类
    dir_path = "C:\\Users\\dhz1216\\Desktop\\wenben"
    class_select_sql = "select * from law_class"  # 查找法律分类表law_class
    law_select_sql = "select id from law where text_name = %s"  # 根据文本名称查找law ID
    update_sql = "update law set type = %s where id = %s"  # 根据ID更新law表中的type信息
    insert_sql = "insert into law_to_class (law_id, class_id) value (%s, %s)"  # 插入法律和分类对照表

    cursor = conn.cursor()
    cursor.execute(class_select_sql)
    results = cursor.fetchall()
    class_id_dict = dict()  # 记录类别和id的对应关系
    class_keyword_dict = dict()  # 记录类别和关键词的对应关系
    for res in results:
        class_id_dict.update({res[1]: res[0]})
        class_keyword_dict.update({res[1]: str(res[2]).split(',')})

    for file in os.listdir(dir_path):
        text_name = file.split('.')[0]
        cursor.execute(law_select_sql, (text_name))
        law = cursor.fetchone()
        if law is None:
            continue
        law_id = law[0]
        class_type = str()
        for c in class_id_dict:
            for word in class_keyword_dict[c]:
                if word in text_name:
                    class_type = c
                    break
            if class_type is not None and class_type != '':
                break

        if class_type is None or class_type == '':
            class_type = '其他'
        class_id = class_id_dict[class_type]  # 获取所属类别的id

        try:
            cursor.execute(update_sql, (class_type, law_id))
            conn.commit()
            try:
                cursor.execute(insert_sql, (law_id, class_id))
                conn.commit()
            except Exception as e:
                conn.rollback()
                print('\033[1;32;41m' + text_name + e +
                      ': INSERT FAILED---------' + '\033[0m')
            print(text_name + '--------success--------' + class_type)
        except Exception as e:
            conn.rollback()
            print('\033[1;32;41m' + text_name + e +
                  ': UPDATE FAILED---------' + '\033[0m')
        print(text_name + '------------------' + class_type + str(class_id))
예제 #24
0
def write_to_file_for_observe():
    query_for_parse_sentence = '''select complete_sentence, parse_sentence, class, sentence_id 
                                  from dependency_parsing_result group by parse_sentence order by id'''
    cursor = conn.cursor()
    cursor.execute(query_for_parse_sentence)
    parse_sentences = cursor.fetchall()

    with open(OUTPUT_FILE, "a") as w:
        for parse_sentence in parse_sentences:
            w.write("原句:" + parse_sentence[0] + '\n')
            w.write("解析:" + parse_sentence[1] + '\n')
            cursor.execute(
                SELECT_DP_SQL,
                (parse_sentence[1], parse_sentence[2], parse_sentence[3]))
            dp_results = cursor.fetchall()
            cursor.execute(
                SELECT_SDP_SQL,
                (parse_sentence[1], parse_sentence[2], parse_sentence[3]))
            sdp_results = cursor.fetchall()
            cursor.execute(
                SELECT_SRL_SQL,
                (parse_sentence[1], parse_sentence[2], parse_sentence[3]))
            srl_results = decode_srl_results(cursor.fetchall())
            w.write(
                "-----------------------------依存句法分析结果---------------------------\n"
            )
            for dp in dp_results:
                front_word = dp[7]
                relation_name = dp[8]
                tail_word = dp[9]
                w.write("%s -----(%s)---- %s\n" %
                        (front_word, relation_name, tail_word))
            w.write(
                "-----------------------------语义角色标注结果---------------------------\n"
            )
            for verb in srl_results:
                w.write(verb + ":\t")
                for role_info in srl_results[verb]:
                    w.write(role_info[0] + '-' + role_info[1] + '\t')
                w.write('\n')
            w.write(
                "-----------------------------语义依存分析结果---------------------------\n"
            )
            for sdp in sdp_results:
                front_word = sdp[7]
                relation_name = sdp[8]
                tail_word = sdp[9]
                w.write("%s -----(%s)---- %s\n" %
                        (front_word, relation_name, tail_word))
            w.write(
                "\n********************************************************************************************\n"
            )
def accord_relation_process():
    cursor = conn.cursor()
    select_sql = 'select * from accord_relation'
    cursor.execute(select_sql)
    results = cursor.fetchall()
    accord_list = []
    for res in results:
        relation = res[5]
        for accord in str(relation).split('/'):
            accord_list.append(accord)
    accord_set = set(accord_list)
    for accord in accord_set:
        print(accord)
def query_for_subject():
    cursor = conn.cursor()
    subject_list = []
    for class_type in SINGLE_RELATION_CLASS:
        table_name = class_type + '_relation'
        query_sql = 'select * from %s' % table_name
        cursor.execute(query_sql)
        results = cursor.fetchall()
        for relation in results:
            subject = relation[4]
            subject_list.append(subject)
    subject_set = set(subject_list)
    return subject_set
예제 #27
0
def chapter_article_parser(file_path):      # "第xx条"类文本解析与格式化
    file_name = file_path.split('\\')[-1]
    write_path = "C:\\Users\\dhz1216\\Desktop\\washing\\第一类\\" + file_name
    # 查询法规基本数据库中该文本的id和对应的法规名称
    law_name = file_name.split('.')[0]
    try:
        cursor = conn.cursor()
        select_sql = "select id from law where text_name = %s"
        cursor.execute(select_sql, (file_name.split('.')[0]))
        law_id = cursor.fetchone()[0]
    except Exception as e:
        print(e)
        return

    with open(file_path, "r", encoding='gbk', errors='ignore') as f:
        line = f.readline()
        pattern = re.compile("第(.*?)(?:章|条)")
        while line:
            if line.startswith('【法规全文】'):
                line = line.replace('【法规全文】', '')
                with open(write_path, "a") as w:
                    while line:
                        match = pattern.match(line.lstrip())
                        if match:
                            p_key = match.group()
                            p_content = line.replace(match.group(), '').lstrip()
                            line = f.readline()
                            while line:
                                match = pattern.match(line.lstrip())
                                if not match:
                                    p_content = p_content + line
                                    line = f.readline()
                                else:
                                    break
                            w.write(p_key + ':  ' + p_content + '\n')
                            insert_sql = "insert into law_content_parse (law_id, p_key, p_content, law_name) " \
                                         "value (%s, %s, %s, %s)"
                            try:
                                cursor.execute(insert_sql, (law_id, p_key, p_content, law_name))
                                conn.commit()
                                print(file_name + ': PARSE SUCCESS')
                            except Exception as e:
                                print(e)
                                conn.rollback()
                                print('\033[1;32;41m' + file_name + ': PARSE FAILED---------' + '\033[0m')
                        else:
                            line = f.readline()
            else:
                line = f.readline()
예제 #28
0
def dp_based_similarity_core():
    cursor = conn.cursor()
    model = Word2Vec.load('../model/forestry_law.model')
    cn_reg = '^[\u4e00-\u9fa5]+$'
    select_sql = '''select * from dependency_parsing_result where parse_sentence = %s'''
    sentence1 = '各级林业主管部门负责木材经营加工的管理和监督。'
    sentence2 = '市园林主管部门应负责组织城市园林病虫害防治工作。'
    sentence2 = '市园林主管部门负责监督和技术指导。'
    # sentence2 = '没有违法所得或者违法所得不足三万元的,并处三千元以上三万元以下罚款。'
    cursor.execute(select_sql, (sentence1, ))
    res1 = cursor.fetchall()
    group1 = []
    group2 = []
    for res in res1:
        front_word = res[7]
        relation = res[8]
        tail_word = res[9]
        if re.search(cn_reg, front_word) and re.search(cn_reg, tail_word):
            group1.append(tuple((front_word, relation, tail_word)))
        else:
            continue

    cursor.execute(select_sql, (sentence2, ))
    res2 = cursor.fetchall()
    for res in res2:
        front_word = res[7]
        relation = res[8]
        tail_word = res[9]
        if re.search(cn_reg, front_word) and re.search(cn_reg, tail_word):
            group2.append(tuple((front_word, relation, tail_word)))
        else:
            continue
    max_len = max(len(group1), len(group2))
    print(max_len)
    sim_score = 0
    for pair1 in group1:
        for pair2 in group2:
            if pair1[1] == pair2[1]:
                if model[pair1[0]].any() and model[pair1[2]].any() and model[
                        pair2[0]].any() and model[pair2[2]].any():
                    sim1 = model.similarity(pair1[0], pair2[0])
                    sim2 = model.similarity(pair1[2], pair2[2])
                    print(sim1)
                    print(sim2)
                    print('-----------------------------')
                    if sim1 > 0.35 and sim2 > 0.35:
                        sim_score = sim_score + 0.7 * (
                            (sim1 + sim2) / 2) / max_len
    print(sim_score)
예제 #29
0
def science_spot_parser(file_path):
    dir_path = "C:\\Users\\dhz1216\\Desktop\\washing\\风景名胜"
    file_name = file_path.split("\\")[-1]
    cursor = conn.cursor()
    select_sql = "select id, name from law where text_name = %s"
    cursor.execute(select_sql, (file_name.split('.')[0]))
    law_id = cursor.fetchone()[0]
    count = 0
    with open(file_path, "r", encoding='gbk', errors='ignore') as f:
        line = f.readline()
        while line:
            if line.startswith('【法规全文】'):
                with open(dir_path + "\\" + file_name, "a") as w:
                    line = line.replace('【法规全文】', '')
                    # line = f.readline()
                    while line:
                        if len(line.lstrip().split(' ')) > 1:
                            key_title = line.lstrip().split(' ')[0]
                            value_content = line.lstrip().split(' ')[1]
                            line = f.readline()
                            while line:
                                if len(line.lstrip().split(' ')) <= 1:
                                    value_content = value_content + line.lstrip(
                                    ).split(' ')[0]
                                    line = f.readline()
                                else:
                                    break
                            w.write(key_title + ':' + value_content + '\n')
                            insert_sql = "insert into law_content (law_id, p_key, p_content, law_class) " \
                                         "value (%s, %s, %s, %s)"
                            try:
                                cursor.execute(
                                    insert_sql,
                                    (law_id, key_title, value_content, '风景名胜'))
                                conn.commit()
                                count = count + 1
                                print('\033[1;37;40m' + file_name +
                                      ': PARSE SUCCESS' + '\033[0m')
                            except Exception as e:
                                print(e)
                                conn.rollback()
                                print('\033[1;32;41m' + file_name +
                                      ': PARSE FAILED---------' + '\033[0m')

                        else:
                            line = f.readline()
            else:
                line = f.readline()
    print('共插入:' + str(count) + '条')
예제 #30
0
def filter_subject():
    cursor = conn.cursor()
    select_sql = '''select * from new_forestry_subject'''
    insert_sql = '''insert into new_forestry_subject_final (subject) value (%s)'''
    cursor.execute(select_sql)
    results = cursor.fetchall()
    subject_list = []
    for res in results:
        subject = res[1]
        subject_list.append(subject)
    subject_set = list(set(subject_list))
    for s in subject_set:
        cursor.execute(insert_sql, (s, ))
        conn.commit()
        print(s, 'success!')