Python re_findone Beispiele, i_entity_extractor.common_parser_lib.toolsutil.re_findone Python Beispiele

Beispiel #1

0

Datei anzeigen

 def get_court2nd(self, content):
     """获取法院"""
     court = ''
     content = unicode(content)
     res = toolsutil.re_findone(self.court_pattern2nd, content)
     if not res:
         res = toolsutil.re_findone(self.court_pattern2nd, content)
     if res:
         court = res
     return court

Beispiel #2

0

Datei anzeigen

    def get_money_list(self, judge_content):
        '''获取涉案最大金额'''
        tmp_max_money_list = toolsutil.my_split(judge_content, [',', '，', '。'])
        ret_list = toolsutil.re_findall(self.money_regex,
                                        unicode(judge_content))

        money_list = []
        for row_content in tmp_max_money_list:
            ret_chs = toolsutil.re_findone(self.money_regex_chs,
                                           unicode(row_content))
            if ret_chs:
                chs_money = self.money_parser.trans_chs_money(ret_chs)

                money_list.append(float(chs_money[0]))

        if ret_list:
            for ret in ret_list:
                digit_money = self.money_parser.transfer_money(ret)
                money_list.append(float(digit_money[0]))

        if money_list == []:
            ret_list2 = toolsutil.re_findall(self.money_regex_last,
                                             unicode(judge_content))
            if ret_list2:
                for ret in ret_list2:
                    digit_money = self.money_parser.transfer_money(ret)
                    money_list.append(float(digit_money[0]))
        if money_list != []:
            max_money = max(money_list)
            sum_money = sum(money_list)
        else:
            max_money = 0
            sum_money = 0

        return max_money, sum_money

Beispiel #3

0

Datei anzeigen

    def format_extract_data(self, extract_data, topic_id):
        '''解析数据'''
        entity_data = copy.deepcopy(extract_data)
        tmp_max_money = extract_data.get("duty", "")
        tmp_max_money = unicode(tmp_max_money.replace(" ", ""))
        tmp_max_money_list = toolsutil.my_split(tmp_max_money, [',', '，', '。'])
        ret_list = toolsutil.re_findall(self.money_regex, tmp_max_money)
        money_list = []
        for row_content in tmp_max_money_list:
            ret_chs = toolsutil.re_findone(self.money_regex_chs,
                                           unicode(row_content))
            if ret_chs:
                chs_money = self.parser_tool.money_parser.trans_chs_money(
                    ret_chs)
                money_list.append(float(chs_money[0]))
        if ret_list:
            for ret in ret_list:
                digit_money = self.parser_tool.money_parser.transfer_money(ret)
                money_list.append(float(digit_money[0]))
        if money_list == []:
            ret_list2 = toolsutil.re_findall(self.money_regex_last,
                                             tmp_max_money)
            if ret_list2:
                for ret in ret_list2:
                    digit_money = self.parser_tool.money_parser.transfer_money(
                        ret)
                    money_list.append(float(digit_money[0]))
        if money_list == []:
            ret3 = toolsutil.re_findone(self.money_regex3, tmp_max_money)
            if ret3 and ret3 == tmp_max_money:
                money_list.append(float(ret3))
        if money_list != []:
            max_money = max(money_list)
            sum_money = sum(money_list)
        else:
            max_money = 0
            sum_money = 0

        court = entity_data.get("court", "")
        province = self.parser_tool.province_parser.get_province(court)

        entity_data["max_money"] = max_money
        entity_data["sum_money"] = sum_money
        entity_data["province"] = province

        return entity_data

Beispiel #4

0

Datei anzeigen

 def get_court(self, content):
     '''获取法院'''
     court = self.parser_tool.court_parser.get_court(content)
     if not court:
         ret = toolsutil.re_findone(self.court_regex, content)
         if ret:
             court = ret
         else:
             court = ""
     return court

Beispiel #5

0

Datei anzeigen

    def format_extract_data(self, extract_data, topic_id):
        '''实体解析抽取数据'''
        entity_data = copy.deepcopy(extract_data)
        if entity_data.has_key("code"):
            code_content = entity_data.get("code")
            ret = toolsutil.re_findone(self.public_sector_regex, code_content)
            if ret:
                entity_data["public_sector"] = self.public_sector_dict.get(ret)

        if entity_data.has_key("info"):
            entity_data.pop("info")

            for item in extract_data["info"]:
                for key, value in item.items():
                    base_key = key[:3]
                    base_value = "value" + key[3:]
                    if base_key == "key" and item.has_key(base_value):

                        key = item[key].encode("utf8")
                        value = item[base_value].encode("utf8")
                        key_values = [(key, value)]
                        key_pars = key.split("\t")
                        value_pars = value.split("\t")
                        if len(key_pars) > 1 and len(key_pars) == len(
                                value_pars):
                            index = 0
                            while index < len(key_pars):
                                key_values.append(
                                    (key_pars[index], value_pars[index]))
                                index += 1
                        for key, value in key_values:
                            for key_conf, value_conf in self.mapping_conf.items(
                            ):
                                if key_conf in key:
                                    if unicode(key_conf) == u"注册资本":
                                        src_money = key + value
                                        value, unit = self.parser_tool.money_parser.transfer_money(
                                            src_money)
                                    entity_data[value_conf] = value
                                    break

        return entity_data

Beispiel #6

0

Datei anzeigen

 def get_court_place(self, content):
     '''获取开庭地点'''
     content = unicode(content).replace(" ", "")
     content_list = toolsutil.my_split(content, self.seps)
     court_place = self.parser_tool.court_place_parser.get_court_place(
         content)
     if not court_place:
         for row_content in content_list:
             court_place_list = toolsutil.re_findone(
                 self.court_place_regex, unicode(row_content))
             if court_place_list:
                 for item in court_place_list:
                     if item:
                         court_place = item
                         break
             if court_place and len(court_place) < self.court_place_len:
                 for replace_str in ktgg_conf.court_place_replace_str_list:
                     court_place = court_place.replace(replace_str, '')
                 break
     return court_place

Beispiel #7

0

Datei anzeigen

Datei: fygg_parser.py Projekt: mylove1/crawler-2

    def format_litigant(self, input_plaintiff_list, input_defendant_list,
                        replace_str_list):
        '''格式化当事人'''
        plaintiff_list = []
        defendant_list = []
        for defendant in input_defendant_list:
            defendant = re.sub(u'（\S+）', '', unicode(defendant))
            for replace_str in replace_str_list:
                defendant = defendant.replace(replace_str, "")
            if len(unicode(defendant)) >= self.min_litigant_len and len(
                    unicode(defendant)) <= self.max_litigant_len:
                defendant_list.append(defendant)
        for plaintiff in input_plaintiff_list:
            plaintiff = re.sub(u'（\S+）', '', unicode(plaintiff))
            for replace_str in replace_str_list:
                plaintiff = plaintiff.replace(replace_str, "")
            if len(unicode(plaintiff)) >= self.min_litigant_len and len(
                    unicode(plaintiff)) <= self.max_litigant_len:
                plaintiff_list.append(plaintiff)

        plaintiff_list = [x for x in plaintiff_list if x]
        defendant_list = [
            x for x in defendant_list if x not in plaintiff_list and x
        ]

        new_defendant_list = []
        for defendant in defendant_list:
            if toolsutil.re_findone(
                    self.case_id_regex,
                    defendant) or defendant in plaintiff_list or not defendant:
                continue

            if u'法院' in defendant:
                continue
            defendant = defendant.replace('(', '（').replace(')', '）')
            defendant = re.sub(u'（\S+）', '', defendant)
            new_defendant_list.append(defendant)

        return plaintiff_list, new_defendant_list

Beispiel #8

0

Datei anzeigen

    def before_parser(self, extract_data):
        '''解析预处理'''
        province = extract_data.get('province', '')
        content = extract_data.get('content', '')
        self.log.info("%sktgg_common_parser" % province)
        extract_data_list = []

        if province in [u'河南']:
            extract_data["content"] = extract_data.get("title", "")
            extract_data_list.append(extract_data)
        elif province in [u"海南", u"辽宁"]:
            if unicode(content).find(u'基本案情') != -1:
                content = unicode(content)
                court_time = toolsutil.re_findone(
                    re.compile(u'开庭时间[:：]{0,1}(\S+)'), content)
                if court_time:
                    extract_data["court_time"] = toolsutil.norm_date_time(
                        court_time)

                court_place = toolsutil.re_findone(
                    re.compile(u'开庭地点[:：]{0,1}(\S+庭)'), content)
                if court_place:
                    extract_data["court_place"] = court_place

                case_id_content = toolsutil.re_findone(
                    re.compile(u'案号[:：]{0,1}(\S+)'), content)
                if case_id_content:
                    case_id = self.parser_tool.caseid_parser.get_case_id(
                        case_id_content)
                    extract_data["case_id"] = case_id

                court = toolsutil.re_findone(re.compile(u'法院[:：]{0,1}(\S+)'),
                                             content)
                if court:
                    extract_data["court"] = court

                judge_data = toolsutil.re_findone(
                    re.compile(u'审判长[:：]{0,1}(\S+)|主办人[:：]{1,2}(\S+)'),
                    content)
                judge = ""
                if judge_data:
                    judge = self.get_value_from_tuple(judge_data)
                if judge:
                    extract_data["judge"] = judge

                plaintiffs_data = toolsutil.re_findone(
                    re.compile(
                        u'原告人[:：]{0,1}(\S+)|公诉人[:：]{1,2}(\S+)|原告[:：]{1,2}(\S+)'
                    ), content)
                plaintiffs = ""
                if plaintiffs_data:
                    plaintiffs = self.get_value_from_tuple(plaintiffs_data)
                if plaintiffs:
                    extract_data["plaintiffs"] = plaintiffs

                defendants_data = toolsutil.re_findone(
                    re.compile(u'被告人[:：]{0,1}(\S+)|被告[:：]{1,2}(\S+)'), content)
                defendants = ""
                if defendants_data:
                    defendants = self.get_value_from_tuple(defendants_data)
                if defendants:
                    extract_data["defendants"] = defendants

                content = toolsutil.re_findone(
                    re.compile(u'基本案情[:：]{0,1}(\S+)'), content)
                if content:
                    extract_data["content"] = content
                extract_data_list.append(extract_data)
            else:
                extract_data_list.append(extract_data)
        else:
            extract_data_list.append(extract_data)

        return extract_data_list

Beispiel #9

0

Datei anzeigen

    def get_data_from_content(self, extract_data):
        '''抽取数据中有内容'''
        src_content = extract_data.get("content")
        content = unicode(src_content).replace(" ", "")
        content_list = toolsutil.my_split(content, self.seps)
        content = self.norm_content(content)
        #1 解析案由
        case_cause_list = []
        if not extract_data.has_key("case_cause"):
            case_cause_list = self.parser_tool.case_cause_parser.get_case_causes(
                content)
            case_cause = ','.join(case_cause_list)
            extract_data["case_cause"] = case_cause
        else:
            extract_data["case_cause"] = extract_data["case_cause"].replace(
                u"一案", "")
            case_cause_list.append(extract_data["case_cause"])
        #2 解析案号
        if not extract_data.has_key("case_id"):
            case_id = self.parser_tool.caseid_parser.get_case_id(content)
            extract_data["case_id"] = case_id
        #3 解析法院
        if not extract_data.has_key("court"):
            extract_data["court"] = self.get_court(content)
        else:
            court_list = toolsutil.my_split(extract_data.get("court", ""),
                                            ['：', ':'])
            if len(court_list) > 0:
                extract_data["court"] = court_list[-1]
        #4 解析法官
        if not extract_data.has_key("judge"):
            judge = ""
            src_content = unicode(src_content)
            tmp_content_list = toolsutil.my_split(src_content,
                                                  ['\r\n', '\r', '\n'])
            for row_content in tmp_content_list:
                judge = toolsutil.re_findone(self.judge_regex,
                                             unicode(row_content))
                if judge:
                    judge_list = toolsutil.my_split(judge, [' ', '，', '：'])
                    judge = ','.join(judge_list)
                    break
                else:
                    judge = ""
            extract_data["judge"] = judge
        #5 解析开庭时间
        if not extract_data.has_key("court_time"):
            for row_content in content_list:
                court_time = toolsutil.re_findone(self.court_time_regex,
                                                  unicode(row_content))
                if court_time:
                    for week in ktgg_conf.week_day_list:
                        court_time = court_time.replace(week, " ")
                else:
                    court_time = ""
                extract_data["court_time"] = court_time
                break

        #6 解析开庭地点
        if not extract_data.has_key("court_place"):
            extract_data["court_place"] = self.get_court_place(content)

        #7 解析当事人／原告／被告
        #---获取当事人内容
        litigants = plaintiffs = defendants = ''
        find_flag = False
        src_content = src_content.replace(" ", "")
        tmp_content_list = toolsutil.my_split(src_content, ['\r', '\n', '。'])
        for litigant_regex in self.litigant_regex_list:
            for row_content in tmp_content_list:
                litigants = toolsutil.re_findone(litigant_regex,
                                                 unicode(row_content))
                if litigants:
                    find_flag = True
                    break
            if find_flag:
                break
            else:
                litigants = ""

        if litigants == "":
            litigants = content

        #---通过当事人内容获取原告被告
        plaintiffs, defendants, litigants = self.get_plaintiff_defendant(
            litigants, extract_data.get("case_id", ""),
            extract_data.get("court", ""), case_cause_list)
        if litigants:
            litigant_list, litigants, plaintiff_list, defendant_list = self.format_litigants(
                plaintiffs, defendants, litigants)
        else:
            litigant_list = plaintiff_list = defendant_list = []
        for item in self.strip_list:
            extract_data['content'] = extract_data.get('content').replace(
                item, ' ')
        return self.get_entity_data(extract_data, litigant_list, litigants,
                                    plaintiff_list, defendant_list)

Beispiel #10

0

Datei anzeigen

    def format_extract_data(self, extract_data, topic_id):
        '''实体解析抽取数据'''

        is_baseinfo_page = False

        entity_data = {}
        if extract_data:
            entity_data = copy.deepcopy(extract_data)

            for key in gsxx_conf.gsxx_key_list:
                if entity_data.has_key(key) and entity_data.get(key) == None:
                    entity_data.pop(key)

            if entity_data.has_key("base_info"):
                is_baseinfo_page = True
                entity_data.pop("base_info")
                for item in extract_data["base_info"]:
                    for key, value in item.items():
                        base_key = key[:3]
                        base_value = "value" + key[3:]
                        if base_key == "key" and item.has_key(base_value):
                            key = item[key].encode("utf8")
                            if item.get(base_value) == None:
                                continue
                            value = item.get(base_value, "").encode("utf8")
                            key_values = [(key, value)]
                            key_pars = key.split("\t")
                            value_pars = value.split("\t")
                            if len(key_pars) > 1 and len(key_pars) == len(
                                    value_pars):
                                index = 0
                                while index < len(key_pars):
                                    key_values.append(
                                        (key_pars[index], value_pars[index]))
                                    index += 1
                            for key, value in key_values:
                                value = value.strip()
                                if key in self.mapping_conf:
                                    if not entity_data.has_key(
                                            self.mapping_conf[key]):
                                        entity_data[
                                            self.mapping_conf[key]] = value
                                    if self.mapping_conf[key] == 'code':
                                        if value != None and len(value) >= 18:
                                            entity_data[
                                                "unified_social_credit_code"] = value
                                        else:
                                            entity_data[
                                                "registered_code"] = value
                                    break
            company = entity_data.get("company", "")
            # Check whether this page is the base info page
            # F**k this code a thousand times!
            if entity_data.has_key("unified_social_credit_code") or \
                    entity_data.has_key("registered_code") or \
                    entity_data.has_key("code"):
                is_baseinfo_page = True
                if company == "":
                    self.log.error("base info without company " +
                                   json.dumps(entity_data))

                if entity_data.has_key("shareholder_information"):
                    shareholder_information = entity_data.get(
                        "shareholder_information")
                    new_shareholder_information = []
                    for each in shareholder_information:
                        if each.has_key("subscription_detail") and each.get(
                                "subscription_detail") != None:
                            each["subscription_detail"] = self.deal_data(
                                each.get("subscription_detail", []),
                                ["subscription_amount"])
                            each["subscription_detail"] = self.deal_time(
                                each.get("subscription_detail", []), [
                                    "subscription_time",
                                    "subscription_publish_time"
                                ])

                        if each.has_key("paied_detail"
                                        ) and each.get("paied_detail") != None:
                            each["paied_detail"] = self.deal_data(
                                each.get("paied_detail", []), ["paied_amount"])
                            each["paied_detail"] = self.deal_time(
                                each.get("paied_detail", []),
                                ["paied_time", "paied_publish_time"])

                        new_shareholder_information.append(each)

                    entity_data["shareholder_information"] = self.deal_data(
                        new_shareholder_information,
                        ["subscription_amount", "paied_amount"])

                if entity_data.has_key("contributor_information"):
                    shareholder_information = entity_data.get(
                        "contributor_information")
                    new_shareholder_information = []
                    for each in shareholder_information:
                        if each.has_key("subscription_detail") and each.get(
                                "subscription_detail") != None:
                            each["subscription_detail"] = self.deal_data(
                                each.get("subscription_detail", []),
                                ["subscription_amount"])
                            each["subscription_detail"] = self.deal_time(
                                each.get("subscription_detail", []), [
                                    "subscription_time",
                                    "subscription_publish_time"
                                ])

                        if each.has_key("paied_detail"
                                        ) and each.get("paied_detail") != None:
                            each["paied_detail"] = self.deal_data(
                                each.get("paied_detail", []), ["paied_amount"])
                            each["paied_detail"] = self.deal_time(
                                each.get("paied_detail", []),
                                ["paied_time", "paied_publish_time"])

                        new_shareholder_information.append(each)

                    entity_data["contributor_information"] = self.deal_data(
                        new_shareholder_information,
                        ["subscription_amount", "paied_amount"])

                if entity_data.has_key("code"):
                    value = entity_data["code"]
                    # entity_data.pop("code")
                    if len(value) == 18:
                        entity_data["unified_social_credit_code"] = value
                    else:
                        entity_data["registered_code"] = value

                src_registered_capital = entity_data.get(
                    'src_registered_capital')
                if not src_registered_capital:
                    src_registered_capital = entity_data.get(
                        'registered_capital')
                if src_registered_capital:
                    entity_data[
                        'src_registered_capital'] = src_registered_capital
                    registered_capital, registered_capital_unit = self.parser_tool.money_parser.transfer_money(
                        src_registered_capital)
                    entity_data["registered_capital"] = registered_capital
                    entity_data[
                        "registered_capital_unit"] = registered_capital_unit

                if entity_data.has_key("period_from"):
                    start_time = self.parser_tool.date_parser.get_date_list(
                        entity_data["period_from"])
                    entity_data.pop("period_from")
                    if entity_data.has_key("period_to"):
                        if entity_data.get("period_to") == None:
                            entity_data.pop("period_to")
                        else:
                            end_time = self.parser_tool.date_parser.get_date_list(
                                entity_data["period_to"])
                            entity_data["period"] = toolsutil.norm_date(
                                start_time) + u"至" + toolsutil.norm_date(
                                    end_time)
                            entity_data.pop("period_to")
                    else:
                        entity_data["period"] = toolsutil.norm_date(
                            start_time) + u"至"

                if entity_data.has_key("period"):
                    period = entity_data.get("period", "")
                    ret = toolsutil.re_findone(self.period_regex, period)
                    if ret and len(ret) == 2:
                        start_time = toolsutil.norm_date(ret[0])
                        end_time = toolsutil.norm_date(ret[1])
                        period = start_time + u"至" + end_time
                    else:
                        ret2 = toolsutil.re_findone(self.period_regex2, period)
                        if ret2:
                            start_time = toolsutil.norm_date(ret2)
                            period = start_time + u"至"
                        else:
                            period = u"－－"
                    entity_data["period"] = period

                if company != "":
                    province, city = self.cal_province_city(entity_data)
                    entity_data["province"] = entity_data.get(
                        "province") if entity_data.get(
                            "province") else province
                    entity_data["city"] = entity_data.get(
                        "city") if entity_data.get("city") else city

                    if not self.filter_company(company):
                        entity_data["delete"] = 1

                    entity_data['company'] = company.replace('(', '（').replace(
                        ')', '）')

            if entity_data.has_key("changerecords"):
                changerecords_list = []
                used_name_list = []
                for item in entity_data["changerecords"]:
                    change_item = item.get("change_item", "")
                    change_item = unicode(change_item)
                    if change_item in gsxx_conf.used_name_change_item_list:
                        after_name = unicode(item.get("after_content", ""))
                        befor_name = unicode(item.get("before_content", ""))

                        checked_after_name = self.check_name(
                            company, after_name)
                        checked_befor_name = self.check_name(
                            company, befor_name)

                        if checked_after_name:
                            used_name_list.append(checked_after_name)
                        if checked_befor_name:
                            used_name_list.append(checked_befor_name)

                    change_date = item.get("change_date", "")
                    if isinstance(change_date, basestring):
                        change_date = toolsutil.norm_date_time(
                            self.parser_tool.date_parser.get_date_list(
                                item.get("change_date", "")))
                    else:
                        change_date = str(change_date)
                        ret = toolsutil.re_find_one(u'\d+', change_date)
                        if len(change_date) > 10 and ret == change_date:
                            tmp = int(change_date[:-3])
                            data_value = time.strftime("%Y-%m-%d %H:%M:%S",
                                                       time.localtime(tmp))
                            change_date = data_value

                    item["change_date"] = change_date
                    changerecords_list.append(item)
                used_name_list = list(set(used_name_list))
                entity_data["used_name_list"] = used_name_list
                entity_data["changerecords"] = changerecords_list

            if entity_data.has_key("invested_companies"):
                invested_companies_list = []
                for single in entity_data["invested_companies"]:
                    invest_amount, invest_amount_unit = self.parser_tool.money_parser.transfer_money(
                        single.get("invest_amount", ""))
                    single["invest_amount"] = invest_amount
                    single["invest_amount_unit"] = invest_amount_unit
                    invested_companies_list.append(single)
                entity_data["invested_companies"] = invested_companies_list

            if entity_data.has_key("investor_change"):
                entity_data["investor_change"] = self.deal_time(
                    entity_data.get("investor_change", []), ["change_date"])

            if entity_data.has_key("business_status"):
                entity_data["business_status"] = entity_data.get(
                    "business_status", "").replace(',', '，')

            if entity_data.has_key("registered_code"):
                value = entity_data.get("registered_code", "")
                if len(value.strip()) >= 18:
                    entity_data["registered_code"] = ""
            else:
                entity_data["registered_code"] = ""

            if not entity_data.get("industry"):
                entity_data[
                    "industry"] = self.parser_tool.industry_parser.predict(
                        company)

            for key, value in entity_data.items():
                if value is None or (isinstance(value, basestring)
                                     and value.strip() == ''):
                    del entity_data[key]

        return entity_data

Beispiel #11

0

Datei anzeigen

Datei: fygg_parser.py Projekt: mylove1/crawler-2

    def get_parser_data(self, content, bulletin_type):
        '''获取实体信息,当事人,原告,被告,公告类型'''

        plaintiff_list = []
        defendant_list = []
        norm_content = unicode(content).replace(" ", "")
        content_list = toolsutil.my_split(norm_content,
                                          ['，', ',', '。', '\r\n', '\t'])
        find_flag = False

        #1 获取原告
        for rowcontent in content_list:
            for plaintiff_regex in self.plaintiff_regex_list:
                ret = toolsutil.re_findone(plaintiff_regex,
                                           unicode(rowcontent))
                if ret:
                    plaintiff_list = toolsutil.my_split(
                        ret, self.litiants_seps)
                    #print "原告：",plaintiff_regex.pattern,','.join(plaintiff_list)
                    find_flag = True
                    break
            if find_flag:
                break

        #2 获取被告
        if unicode(bulletin_type) in self.bulletin_type_list:
            find_flag = False
            for rowcontent in content_list:
                for defendant_regex in self.defendant_regex_list:
                    ret = toolsutil.re_findone(defendant_regex,
                                               unicode(rowcontent))
                    if ret:
                        if u'你' in unicode(ret):
                            defendant_list = toolsutil.my_split(
                                content_list[0], self.litiants_seps)
                        else:
                            defendant_list = toolsutil.my_split(
                                ret, self.litiants_seps)
                        if plaintiff_list == []:
                            plaintiff_list = defendant_list
                            defendant_list = []

                        # print "被告：", defendant_regex.pattern, ','.join(defendant_list)
                        # print "原告：", ','.join(plaintiff_list)
                        find_flag = True
                        break

                for defendant_pattern in fygg_conf.defendant_pattern_list:
                    ret = toolsutil.re_find_one(defendant_pattern,
                                                unicode(rowcontent))
                    if ret:
                        defendant_list = toolsutil.my_split(
                            ret, self.litiants_seps)
                        find_flag = True
                        break
                if find_flag:
                    break
            plaintiff_list, defendant_list = self.format_litigant(
                plaintiff_list, defendant_list,
                fygg_conf.litigant_replace_str_list)

        else:
            content_list = toolsutil.my_split(norm_content,
                                              ['。', '\r\n', '\t', '，'])
            for rowcontent in content_list:
                tmp_list = re.split(':|：|;', rowcontent)
                if len(tmp_list) == 2:

                    defendant_list = toolsutil.my_split(
                        tmp_list[0], self.litiants_seps)
                    replace_str_list = fygg_conf.defendant_keyword_list + fygg_conf.plaintiff_keyword_list
                    plaintiff_list, defendant_list = self.format_litigant(
                        plaintiff_list, defendant_list, replace_str_list)
                    break

        info = {
            "plaintiff_list": plaintiff_list,
            "defendant_list": defendant_list,
            "bulletin_type": bulletin_type,
        }

        return info