Exemple #1
0
def law_item_parse(item):
    templates = dict()
    lines = item.strip().replace('<p>', '').replace('\u3000', '').split('</p>')
    if lines:
        first_item = lines[0]
        first_item = item_title_filter(first_item)
        if len(lines) > 1:
            items = lines[1:]
        else:
            items = []
        items = [number_zh_filter(it) for it in items]

        ltp_result_dict = ltp_tool(first_item, 'srl')

        first_segs = first_item_filter(first_item)
        if not first_segs:
            return templates
        seg = ltp_result_dict['seg']
        key_id = 0
        if seg:
            for n in seg:
                if n['word'] in key1:  # 应该为关键字列表
                    key_id = n['id']
                    break
        roles = ltp_result_dict['role']
        if roles:
            for role in roles[::-1]:
                role_type = role['type']
                beg = role['beg']
                end = role['end']
                result = remove_special_character(first_segs[1])
                if role_type == 'A0' and end < key_id:
                    sub = ''.join([n['word'] for n in seg[beg:end + 1]])
                    condition = remove_special_character(first_segs[0].replace(
                        sub, ''))
                    template = st.SentenceTemplate(subject=[sub],
                                                   condition=condition,
                                                   result=result,
                                                   flag=0)
                    break
                elif role_type == 'A1' and end < key_id:
                    segs_ = remove_special_character(first_segs[0])
                    template = st.SentenceTemplate(subject=[],
                                                   condition=segs_,
                                                   result=result,
                                                   flag=1)
                    continue

                else:
                    template = st.SentenceTemplate(subject=[],
                                                   condition='',
                                                   result=first_segs[1],
                                                   flag=1)
            if template:
                condition, subject, behavior, result = template.parse_items(
                    items)
                templates['condition'], templates['subject'], templates['behavior'], templates['result'] = \
                    condition, subject, behavior, result
    return templates
Exemple #2
0
def law_item_parse_j(lines):
    global template
    templates = dict()
    # 按照</p>拆
    lines = lines.strip().replace('<p>', '').replace('\u3000',
                                                     '').split('</p>')
    # 非空行
    if lines:
        # first_item是(一)到(十)前面的说明性文字
        first_item = lines[0]
        # 去掉所有的第……条
        first_item = item_title_filter(first_item)

        items = []
        items_ap = []
        if len(lines) > 1:
            # 第0行一般是“有下列情形之一的……”
            for i, word in enumerate(lines[1:]):
                if has_key_one(word):
                    items.append(word)
                else:
                    items_ap = lines[(i + 1):]
                    break
        # 去掉(一)等
        items = [
            number_zh_filter(remove_special_character(it)) for it in items
        ]
        # 对第一条进行语意角色标注
        # 对应的字典有两个key分别为role 和 seg,role部分是每个词(可能不止一个词,具体几个词由beg和end决定)
        # type代表角色对应类型,id代表这个类型的角色在role里服务的个体
        ltp_result_dict = ltp_tool(first_item, 'srl')
        # ltp_jufa_dict = ltp_parse(first_item, 'parse')
        # 这个用来将“有下列情形之一的”的主客体分开
        first_segs = first_item_filter(first_item)

        if not first_segs:
            return templates
        seg = ltp_result_dict['seg']
        key_id = 0
        if seg:
            for n in seg:
                if n['word'] in key1:
                    key_id = n['id']
                    break
        roles = ltp_result_dict['role']
        # key_id之前的部分是subject,之后的都是result
        if roles:
            # 逆序roles
            for role in roles[::-1]:
                role_type = role['type']
                beg = role['beg']
                end = role['end']
                result = remove_special_character(first_segs[1])
                # 实际上是直到找到A0为止,否则会一直循环下去
                if role_type == 'A0' and end < key_id:
                    sub = ''.join([n['word'] for n in seg[beg:end + 1]])
                    condition = remove_special_character(first_segs[0].replace(
                        sub, ''))
                    template = st.SentenceTemplate(subject=sub,
                                                   condition=condition,
                                                   result=result,
                                                   flag=0)
                    break
                elif role_type == 'A1' and end < key_id:
                    segs_ = remove_special_character(first_segs[0])
                    template = st.SentenceTemplate(subject='',
                                                   condition=segs_,
                                                   result=result,
                                                   flag=1)
                    continue

                else:
                    template = st.SentenceTemplate(subject='',
                                                   condition='',
                                                   result=result,
                                                   flag=1)
            if template:
                # condition, subject, behavior, result = template.parse_items(items)
                beh = []
                for tiao in items:
                    beh.append(tiao)
                templates['condition'], templates['subject'], templates['behavior'], templates['result'] = \
                     template.condition, template.subject, beh, template.result
    return templates
Exemple #3
0
def law_item_parse_j(lines):
    global template
    templates = dict()
    # 按照</p>拆
    result_list = []
    key_list = []
    last_behavior = ''  # 如果behavior,说的是同一个behavior
    key_item = ''
    last_beh = ''  # 必须对其所有的或者所经营的船舶、排筏、设施的安全负责,并且应当 '做到' 下列各项 拿出做到 加到behavior上
    lines = lines.strip().replace('<p>', '').replace('\u3000',
                                                     '').split('</p>')
    # 非空行
    if lines:
        # first_item是(一)到(十)前面的说明性文字
        first_item = lines[0]
        # 去掉所有的第……条
        first_item = item_title_filter(first_item)

        items = []
        items_ap = []
        if len(lines) > 1:
            # 第0行一般是“有下列情形之一的……”
            for i, word in enumerate(lines[1:]):
                if has_key_one(word) or has_key_one_v2(word) or has_key_one_v4(
                        word):  # 有(一)或 一或 1、
                    items.append(word)
                else:
                    items_ap = lines[(i + 1):]
                    break
        # 去掉(一)等
        items = [
            number_zh_filter_plus(
                number_zh_filter(remove_special_character(it))) for it in items
        ]
        # 对第一条进行语意角色标注
        # 对应的字典有两个key分别为role 和 seg,role部分是每个词(可能不止一个词,具体几个词由beg和end决定)
        # type代表角色对应类型,id代表这个类型的角色在role里服务的个体
        ltp_result_dict = ltp_tool(first_item, 'srl')
        # ltp_jufa_dict = ltp_parse(first_item, 'parse')
        # 这个用来将“有下列情形之一的”的主客体分开
        first_segs = first_item_filter(first_item)

        if not first_segs:
            return templates
        if ltp_result_dict:
            seg = ltp_result_dict['seg']
            key_id = 0
            if seg:
                for n in seg:
                    if n['word'] in key2 or n['word'] in key:
                        key_id = n['id']
                        key_item = n['word']
                        if key_item in key:  # 找到key2中的关键词
                            key_item = ''
                            continue
                        if key_item in key2:
                            break
            roles = ltp_result_dict['role']
            # key_id之前的部分是subject,之后的都是result
            if roles:
                # 逆序roles
                for role in roles[::-1]:
                    role_type = role['type']
                    beg = role['beg']
                    end = role['end']
                    result = remove_special_character(first_segs[1]).replace(
                        key_item, '')  # 除去result中的key
                    # 实际上是直到找到A0为止,否则会一直循环下去
                    # 优先判断特殊情况
                    if check_sub_v2(first_segs[0]):
                        try:
                            sub = ''
                            condition = remove_special_character(
                                (first_segs[0].replace(key_item, '')))
                            if second_item_filter(
                                    first_segs[0]
                            ):  # 未经自治县交通主管部门批准,在乡道和乡道用地范围内,不得从事下列活动:
                                second_result = second_item_filter(
                                    first_segs[0])
                                condition = remove_special_character(
                                    second_result[0])
                                last_beh = second_result[1]
                            template = st.SentenceTemplate(subject=sub,
                                                           condition=condition,
                                                           result=result,
                                                           flag=0)
                        except Exception:
                            write_to_file_append(lines, 'model_1_error.txt')
                    elif role_type == 'A0' and end < key_id:
                        sub = ''.join([n['word'] for n in seg[beg:end + 1]])
                        if sub in sub_list_2:
                            sub = ''
                        condition = remove_special_character(
                            first_segs[0].replace(sub, ''))
                        if second_item_filter(condition):
                            second_result = second_item_filter(condition)
                            condition = remove_special_character(
                                second_result[0])
                            last_beh = second_result[1]
                        template = st.SentenceTemplate(subject=sub,
                                                       condition=condition,
                                                       result=result,
                                                       flag=0)
                        break
                    elif role_type == 'A1' and end < key_id:
                        segs_ = remove_special_character(first_segs[0])
                        template = st.SentenceTemplate(subject='',
                                                       condition=segs_,
                                                       result=result,
                                                       flag=1)
                        continue
                    else:
                        template = st.SentenceTemplate(subject='',
                                                       condition='',
                                                       result=result,
                                                       flag=1)
                if template:
                    # condition, subject, behavior, result = template.parse_items(items)
                    beh = []
                    for i, tiao in enumerate(items):
                        try:
                            pre_behavior = ''
                            if get_sentence_key(tiao):
                                get_result = get_sentence_key(tiao)
                                key_list.append(''.join(
                                    s for s in get_result[0]))
                                behavior = last_beh + remove_last_de(
                                    tiao.replace(key_list[i], ''))
                                if filter_key_one_behv(
                                        behavior
                                ) and template.subject not in sub_list:  # 对于behavior中是result的情况进行了过滤
                                    filter_result = get_result_from_beh(
                                        behavior)
                                    if filter_result:
                                        if filter_result[0]:
                                            pre_behavior = remove_last_de(
                                                remove_special_character(
                                                    filter_result[0]))
                                        if not pre_behavior:
                                            pre_behavior = last_behavior
                                        beh.append(pre_behavior)
                                        if filter_result[2]:
                                            result_list.append(
                                                filter_result[1] +
                                                filter_result[2])
                                        else:
                                            result_list.append(
                                                filter_result[1])
                                        last_behavior = pre_behavior
                                else:
                                    beh.append(behavior)
                            else:
                                key_item = '' if key_item == '由' else key_item
                                key_list.append(key_item)
                                behavior = last_beh + remove_last_de(tiao)
                                if filter_key_one_behv(behavior) and filter_key_one_behv_plus(behavior) and template.subject not in sub_list \
                                        and template.condition not in condition_list_2:  # 对于behavior中是result的情况进行了过滤
                                    filter_result = get_result_from_beh(
                                        behavior)
                                    if filter_result:
                                        if filter_result[0]:
                                            pre_behavior = remove_last_de(
                                                remove_special_character(
                                                    filter_result[0]))
                                        if not pre_behavior:
                                            pre_behavior = last_behavior
                                        beh.append(pre_behavior)
                                        if filter_result[2]:
                                            result_list.append(
                                                filter_result[1] +
                                                filter_result[2])
                                        else:
                                            result_list.append(
                                                filter_result[1])
                                        last_behavior = pre_behavior
                                else:
                                    beh.append(behavior)
                        except Exception:
                            write_to_file_append(lines, 'model_1_error.txt')
                    if len(template.condition
                           ) <= 1 or template.condition in condition_list:
                        template.condition = ''
                    if result_list:
                        templates['condition'], templates['subject'], templates['key'], templates['behavior'], \
                            templates['result'] = \
                            template.condition+template.result, template.subject, key_list, beh, result_list
                    else:
                        templates['condition'], templates['subject'], templates['key'], templates['behavior'], templates['result'] = \
                            template.condition, template.subject, key_list, beh, template.result.replace(template.subject, '')
    return templates