コード例 #1
0
    def parse_text(self,d,db,cursor):
        # 由于格式原因分为两种情况
        if '排期开庭' in d['title']:
            l = re.findall('(\d{1,4}[年].*?[日上下号])(.*?)\d{1,2}、{1,2}',d['body'])
            for info in l:
                d['body'] = info[0] + info[1]
                d['sorttime'] = info[0]
                d['anyou'] = ktgg.set_anyou(info[1])

                caseNo = re.findall('[\[【((].*?号',info[1])
                d['caseNo'] = ''
                if caseNo:
                    d['caseNo'] = caseNo[0]

                courtNum = re.findall('我院(.*?)开庭审理',info[1])
                d['courtNum'] = ''
                if courtNum:
                    d['courtNum'] = courtNum[0].replace('公开','')

                for i in self.pname_p:
                    s = re.findall(i % d['anyou'],info[1])
                    if s :
                        d['plaintiff'] = s[0][0].replace('原告人','').replace('原告','').replace(',','').replace(',','')
                        d['pname'] = s[0][1].replace('被告人','').replace('被告','')
                        break
                
                d['md5'] = ktgg.get_md5(d['body'],d['url'])
                ktgg.ins_mysql(d,'ktgg_kt_wuhan',db,cursor)

        else:
            anyou = ktgg.set_anyou()
            d['anyou'] = ktgg.search_anyou(anyou,d['body'])
            d['sorttime'] = re.findall('\d{1,4}[年月].*?[日上下号]',d['body'])[0]
            # 案号
            caseNo = re.findall('[\[【((].*?号',d['body'])
            d['caseNo'] = ''
            if caseNo:
                d['caseNo'] = caseNo[0]
            # 开庭地点
            courtNum = re.findall('我院(.*?)开庭审理',d['body'])
            d['courtNum'] = ''
            if courtNum:
                d['courtNum'] = courtNum[0].replace('公开','')
            # 获取原告和被告
            for i in self.pname_p:
                l = re.findall(i % d['anyou'],d['body'])
                if l :
                    d['plaintiff'] = l[0][0].replace('原告人','').replace('原告','').replace(',','').replace(',','')
                    d['pname'] = l[0][1].replace('被告人','').replace('被告','')
                    break
            d['md5'] = ktgg.get_md5(d['body'],d['url'])
            ktgg.ins_mysql(d,'ktgg_kt_wuhan',db,cursor)
コード例 #2
0
    def parse_html(self, links):
        # 连接数据库
        db, cursor = ktgg.con_mysql()

        for i in links:
            d = {}
            url = 'http://' + re.findall('//(.*?)/', self.url)[0] + i
            text, html = ktgg.request_dis(url)
            if text == '':
                continue

            # 提取一些信息
            d['posttime'] = re.findall('发布时间(.*?)<',
                                       html)[0].replace(':', '').strip()
            d['court'] = '郴州市苏仙区人民法院'
            d['source'] = self.url
            d['url'] = url
            d['title'] = re.findall("'b_title'>(.*?)<", html)[0]
            d['province'] = '湖南省'
            for i in self.tihuan:
                text = text.replace(i, '')
            d['body'] = text
            if text == '':
                d['body'] = d['title']
            d['md5'] = ktgg.get_md5(d['body'], d['url'])
            self.parse_text(d, db, cursor)

        # 关闭数据库
        ktgg.clo_mysql(db, cursor)
コード例 #3
0
ファイル: leiyang.py プロジェクト: tianming1903/ktgg_hunan
    def parse_text(self, text, d, db, cursor):
        d['body'] = re.sub('\s', '', text)
        # 提取开庭地点
        courtNum = re.findall('法院(.*?庭)', d['body'])
        if courtNum:
            d['courtNum'] = courtNum[0]
        # 提取时间
        sorttime = re.findall('\d{1,4}[年月].*?[日号]', d['body'])
        if sorttime:
            d['sorttime'] = sorttime[0]
        # 提取审判员
        judge = re.findall('审判员(.*?)[书代]', d['body'])
        if judge:
            d['judge'] = judge[0].replace(':', '')

        # 提取被告,案由,原告(从标题上面提取)
        party = re.findall('被告(.*)', d['title'])
        if party:
            party = party[0]
            anyou = ktgg.set_anyou()
            start, end = ktgg.search_anyou(anyou, party)
            if start == 0:
                return
            d['anyou'] = party[start:end]
            d['pname'] = re.findall('(.*?)%s' % d['anyou'],
                                    party)[0].replace('人', '')
        d['md5'] = ktgg.get_md5(d['body'], d['url'])
        ktgg.ins_mysql(d, 'ktgg_kt_wuhan', db, cursor)
コード例 #4
0
    def parse_text(self, text, d, db, cursor):
        # 有二种格式,要分为二种情况讨论
        if re.findall('\d{2,4}', d['title']):
            infos = re.split('\d{1,2}[、.]', text)
        else:
            infos = [text]
        # 遍历每条开庭信息
        for info in infos:
            d_info = d.copy()
            i = re.split('\s', info)
            info = list(filter(None, i))
            if len(info) == 1:
                continue
            d_info['body'] = ''.join(info).replace('\xa0', '').replace(
                '\u3000', '').replace('\n', '').replace('\r', '')
            for i in info:
                if '案由' in i:
                    d_info['anyou'] = i.split('案由')[1].replace(':', '')
                if '时间' in i:
                    sorttime = re.findall('\d{4}年.*?日', i)
                    if sorttime:
                        d_info['sorttime'] = sorttime[0]
                if '案号' in i:
                    d_info['caseNo'] = i.split('案号')[1]
                if '地点' in i:
                    d_info['courtNum'] = i.split('地点')[1]
                if '被告' in i:
                    d_info['pname'] = i.split('被告')[1]
                for judge in self.judge:
                    if judge in i:
                        d_info['judge'] = i.split(judge)[1]
                        break
                for plaintiff in self.plaintiff:
                    if plaintiff in i:
                        d_info['plaintiff'] = i.split(plaintiff)[1]
                        break

            if '诉' in d_info['title']:
                l = re.findall('(.*?)诉(.*)', d_info['title'])[0]
                d_info['plaintiff'] = l[0]
                pname = re.findall('(.*?)%s' % d_info['anyou'], l[1])
                if pname:
                    d_info['pname'] = pname[0]

            # 字符清洗
            for key, value in d_info.items():
                for x in self.tihuan:
                    value = value.replace(x, '')
                d_info[key] = value
            d_info['md5'] = ktgg.get_md5(d_info['body'], d['url'])

            ktgg.ins_mysql(d_info, 'ktgg_kt_wuhan', db, cursor)
コード例 #5
0
ファイル: yanling.py プロジェクト: tianming1903/ktgg_hunan
 def parse_text(self, text, d, db, cursor):
     # 切割文本
     infos = re.split('\n', text)
     for info in infos:
         if info:
             d_info = d.copy()
             # 提取时间
             d_info['body'] = info.replace('\xa0', '').replace('\r', '')
             sorttime = re.findall('\d{1,4}[年月].*?[日号]', info)
             if sorttime:
                 d_info['sorttime'] = sorttime[0]
             # 提取开庭地点
             courtNum = re.findall('在(.*?庭)', info)
             if courtNum:
                 d_info['courtNum'] = courtNum[0]
             d_info['md5'] = ktgg.get_md5(d_info['body'], d_info['url'])
             # 提取案由,原告,被告
             for party in self.party:
                 party = re.findall(party, info)
                 if party:
                     party = party[0]
                     d_info['plaintiff'] = party[0].replace('原告',
                                                            '').replace(
                                                                '人', '')
                     anyou = ktgg.set_anyou()
                     start, end = ktgg.search_anyou(anyou, party[1])
                     d_info['anyou'] = party[1][start:end]
                     pname = re.findall('(.*?)%s' % d_info['anyou'],
                                        party[1])
                     if pname:
                         d_info['pname'] = pname[0].replace('被告',
                                                            '').replace(
                                                                '人', '')
                         break
                 else:
                     continue
             d_info['md5'] = ktgg.get_md5(d_info['body'], d_info['url'])
             ktgg.ins_mysql(d_info, 'ktgg_kt_wuhan', db, cursor)
コード例 #6
0
ファイル: zixing.py プロジェクト: tianming1903/ktgg_hunan
 def parse_text(self, text, d, db, cursor):
     infos = text.xpath('//span[@class="detail_content"]//tr')[1:]
     for info in infos:
         d_info = d.copy()
         # 提取body
         d_info['body'] = info.xpath('string(.)').replace('\r', '').replace(
             '\n', '')
         # 提取案号
         d_info['caseNo'] = info.xpath('./td[2]/span/text()')[0]
         # 提取审判员
         d_info['judge'] = info.xpath('./td[5]/span/text()')[0]
         # 提取开庭地点
         d_info['courtNum'] = info.xpath('./td[6]/span/text()')[0]
         # 提取时间
         d_info['sorttime'] = info.xpath('./td[7]/span/text()')[0].split(
             ' ')[0]
         # 提取原告和被告和案由
         party = info.xpath('./td[3]/span/text()')[0]
         for i in self.party:
             try:
                 party = re.findall(i, party)[0]
             except IndexError:
                 continue
             else:
                 anyou = ktgg.set_anyou()
                 if type(party) is str:
                     start, end = ktgg.search_anyou(anyou, party)
                     if start == 0:
                         return
                     d_info['anyou'] = party[start:end]
                     d_info['pname'] = re.findall(
                         '(.*?)%s' % d_info['anyou'],
                         party)[0].replace('被告人', '').replace('被告', '')
                 elif type(party) is tuple:
                     start, end = ktgg.search_anyou(anyou, party[1])
                     if start == 0:
                         return
                     d_info['anyou'] = party[1][start:end]
                     d_info['plaintiff'] = party[0].replace('原告人',
                                                            '').replace(
                                                                '原告', '')
                     d_info['pname'] = re.findall(
                         '(.*?)%s' % d_info['anyou'],
                         party[1])[0].replace('被告人', '').replace('被告', '')
             break
         d_info['md5'] = ktgg.get_md5(d_info['body'], d_info['url'])
         ktgg.ins_mysql(d_info, 'ktgg_kt_wuhan', db, cursor)
コード例 #7
0
    def parse_text(self, text, d, db, cursor):
        for i in self.tihuan:
            text = text.replace(i, '')
        d['body'] = text

        # 提取开庭时间
        sorttime = re.findall('\d{1,4}[年月].*?[日号]', text)
        if sorttime:
            d['sorttime'] = sorttime[0]
        # 提取开庭地点
        courtNum = re.findall('在(.{2,7}庭)', text)
        if courtNum:
            d['courtNum'] = courtNum[0]
        else:
            courtNum = re.findall('第.{1,4}庭', text)
            if courtNum:
                d['courtNum'] = courtNum[0]

        # 提取原告,被告和案由
        for i in self.party:
            try:
                party = re.findall(i, d['body'])[0]
            except IndexError:
                continue
            else:
                anyou = ktgg.set_anyou()
                if type(party) is str:
                    start, end = ktgg.search_anyou(anyou, party)
                    if start == 0:
                        ktgg.write_txt('anyou', text)
                    d['anyou'] = party[start:end]
                    d['pname'] = re.findall('(.*?)%s' % d['anyou'],
                                            party)[0].replace('人', '')
                elif type(party) is tuple:
                    start, end = ktgg.search_anyou(anyou, party[1])
                    if start == 0:
                        return
                    d['anyou'] = party[1][start:end]
                    d['plaintiff'] = party[0].replace('人', '')
                    d['pname'] = re.findall('(.*?)%s' % d['anyou'],
                                            party[1])[0].replace('人', '')
            break
        d['md5'] = ktgg.get_md5(d['body'], d['url'])
        ktgg.ins_mysql(d, 'ktgg_kt_wuhan', db, cursor)
コード例 #8
0
ファイル: yongxing.py プロジェクト: tianming1903/ktgg_hunan
 def parse_text(self, d, db, cursor):
     # 提取详细信息
     d['body'] = d['title']
     # 提取开庭地点
     sorttime = re.findall('\d{1,4}[年月].*?[日号]', d['title'])
     if sorttime:
         d['sorttime'] = sorttime[0]
     # 提取开庭时间
     courtNum = re.findall('第.{1,4}庭', d['title'])
     if courtNum:
         d['courtNum'] = courtNum[0]
     # 提取案由和被告以及原告
     for i in self.party:
         try:
             party = re.findall(i, d['title'])[0]
         except IndexError:
             continue
         else:
             anyou = ktgg.set_anyou()
             if type(party) is str:
                 start, end = ktgg.search_anyou(anyou, party)
                 if start == 0:
                     return
                 d['anyou'] = party[start:end]
                 d['pname'] = re.findall('(.*?)%s' % d['anyou'],
                                         party)[0].replace('被告人',
                                                           '').replace(
                                                               '被告', '')
             elif type(party) is tuple:
                 start, end = ktgg.search_anyou(anyou, party[1])
                 if start == 0:
                     return
                 d['anyou'] = party[1][start:end]
                 d['plaintiff'] = party[0].replace('原告人',
                                                   '').replace('原告', '')
                 d['pname'] = re.findall('(.*?)%s' % d['anyou'],
                                         party[1])[0].replace('被告人',
                                                              '').replace(
                                                                  '被告', '')
         break
     d['md5'] = ktgg.get_md5(d['body'], d['url'])
     ktgg.ins_mysql(d, 'ktgg_kt_wuhan', db, cursor)
     time.sleep(0.5)
コード例 #9
0
    def parse_text(self, text, d, db, cursor):
        # 删除不必要的字符和格式化数据
        for i in self.tihuan:
            text = text.replace(i, '')
        info = re.split('\s', text)

        # 提取信息
        if len(info) >= 3 and '--' not in d['title']:
            d['body'] = text.replace('\n', '')
            d['md5'] = ktgg.get_md5(d['body'], d['url'])
            d['sorttime'] = re.findall('\d{1,4}[年月].*?[日号]', d['title'])[0]
            for i in info:
                if '原告' in i:
                    d['plaintiff'] = re.findall('原告(.*)', i)[0].replace(
                        '人', '').replace(':', '').replace(':', '')
                elif '被告' in i:
                    d['pname'] = re.findall('被告(.*)',
                                            i)[0].replace('人', '').replace(
                                                ':', '').replace(':', '')
                elif '案由' in i:
                    d['anyou'] = re.findall('案由(.*)',
                                            i)[0].replace(':',
                                                          '').replace(':', '')
                elif '主审法官' in i:
                    d['judge'] = re.findall('主审法官(.*)',
                                            i)[0].replace(':',
                                                          '').replace(':', '')
                elif '审判员' in i:
                    d['judge'] = re.findall('审判员(.*)',
                                            i)[0].replace(':',
                                                          '').replace(':', '')
                elif '主审人' in i:
                    d['judge'] = re.findall('主审人(.*)',
                                            i)[0].replace(':',
                                                          '').replace(':', '')
                elif '案号' in i:
                    d['caseNo'] = re.findall('案号(.*)',
                                             i)[0].replace(':', '').replace(
                                                 ':', '')
            ktgg.ins_mysql(d, 'ktgg_kt_wuhan', db, cursor)
コード例 #10
0
ファイル: xinning.py プロジェクト: tianming1903/ktgg_hunan
    def parse_text(self, text, d, db, cursor):
        # 切割文本
        infos = re.split('\n', text)
        f = []
        for info in infos:
            d_info = d.copy()
            info = re.split('\s', info)
            info = list(filter(None, info))
            # 第一种情况
            start = 0
            if len(info) >= 6:
                d_info['sorttime'] = ''
                d_info['caseNo'] = ''
                d_info['body'] = ''.join(info)
                for i in info:
                    # 提取案号,案由,被告和原告
                    if ('号' in i) and (d_info['caseNo'] == ''):
                        d_info['caseNo'] = i
                        # 获取party
                        index = info.index(i)
                        party = info[index + 1]
                        # 获取案由
                        anyou = ktgg.set_anyou()
                        start, end = ktgg.search_anyou(anyou, party)
                        if start == 0:
                            break
                        d_info['anyou'] = party[start:end]
                        # 获取原告和被告
                        if '诉' in party:
                            p = re.split('诉', party)
                            d_info['plaintiff'] = p[0]
                            d_info['pname'] = re.findall(
                                '(.*?)%s' % d_info['anyou'], p[1])
                        else:
                            d_info['pname'] = re.findall(
                                '(.*?)%s' % d_info['anyou'], party)[0]
                    # 提取开庭时间和开庭地点
                    if d_info['sorttime'] == '':
                        sorttime = re.findall('\d{4}-\d{2}-\d{2}', i)
                        if sorttime:
                            d_info['sorttime'] = sorttime[0]
                            index = info.index(i)
                            d_info['courtNum'] = info[index - 1]
                if start == 0:
                    continue
                d_info['md5'] = ktgg.get_md5(d_info['body'], d_info['url'])
                ktgg.ins_mysql(d_info, 'ktgg_kt_wuhan', db, cursor)

            # 第二种情况
            elif 0 < len(info):
                f.append(info)
                if len(f) == 2:
                    info = f[0] + f[1]
                    d_info['body'] = ''.join(info)
                    # 提取时间
                    sorttime = re.findall('\d{4}年.*?日', d_info['body'])
                    if sorttime:
                        d_info['sorttime'] = sorttime[0]
                    # 提取法庭
                    courtNum = re.findall('第.{2,6}庭|回龙法庭', d_info['body'])
                    if courtNum:
                        d_info['courtNum'] = courtNum[0]
                    # 获取案号
                    caseNo = re.findall('[((民].*?号', d_info['body'])
                    if caseNo:
                        d_info['caseNo'] = caseNo[0]

                    for i in info:
                        if '诉' in i:
                            # 获取案由
                            anyou = ktgg.set_anyou()
                            start, end = ktgg.search_anyou(anyou, i)
                            if start == 0:
                                break
                            d_info['anyou'] = i[start:end]
                            # 获取原告和被告
                            if '诉' in i:
                                p = re.split('诉', i)
                                if '号' in p[0]:
                                    d_info['plaintiff'] = p[0].split('号')
                                else:
                                    d_info['plaintiff'] = p[0]
                                d_info['pname'] = re.findall(
                                    '(.*?)%s' % d_info['anyou'], p[1])
                            else:
                                d_info['pname'] = re.findall(
                                    '(.*?)%s' % d_info['anyou'], party)[0]

                    f = []
                if start == 0:
                    continue
                d_info['md5'] = ktgg.get_md5(d_info['body'], d_info['url'])
                ktgg.ins_mysql(d_info, 'ktgg_kt_wuhan', db, cursor)
コード例 #11
0
ファイル: beihu.py プロジェクト: tianming1903/ktgg_hunan
    def parse_text(self,text,html,d,db,cursor):
        if '开庭公告' in d['title']:
            infos = html.xpath('//tbody/tr')[1:]
            for info in infos:
                d_info = d.copy()
                d_info['body'] = info.xpath('string(.)').replace('\r','').replace('\n','')
                if len(info.xpath('./td')) == 4:
                    # 提取时间
                    d_info['sorttime'] = info.xpath('./td[4]')[0].xpath('string(.)').split(' ')[0]
                    # 提取地点
                    d_info['courtNum'] = info.xpath('./td[3]')[0].xpath('string(.)')
                    party = info.xpath('./td[2]')[0].xpath('string(.)')
                else:
                    # 提取时间
                    d_info['sorttime'] = info.xpath('./td[3]')[0].xpath('string(.)').split(' ')[0]
                    # 提取地点
                    d_info['courtNum'] = info.xpath('./td[2]')[0].xpath('string(.)')
                    party = info.xpath('./td[1]')[0].xpath('string(.)')
                # 提取被告和原告和案由
                for i in self.party:
                    try:
                        party = re.findall(i,party)[0]
                    except IndexError:
                        continue
                    else:
                        anyou = ktgg.set_anyou()
                        if type(party) is str:
                            start,end = ktgg.search_anyou(anyou,party)
                            if start == 0:
                                return
                            d_info['anyou'] = party[start:end]
                            d_info['pname'] = re.findall('(.*?)%s' % d_info['anyou'],party)[0].replace('被告人','').replace('被告','')
                        elif type(party) is tuple:
                            start,end = ktgg.search_anyou(anyou,party[1])
                            if start == 0:
                                return
                            d_info['anyou'] = party[1][start:end]
                            d_info['plaintiff'] = party[0].replace('原告人','').replace('原告','')
                            d_info['pname'] = re.findall('(.*?)%s' % d_info['anyou'],party[1])[0].replace('被告人','').replace('被告','')
                    break
                else:
                    return
                d_info['md5'] = ktgg.get_md5(d_info['body'],d_info['url'])
                ktgg.ins_mysql(d_info,'ktgg_kt_wuhan',db,cursor)

        else:
            d['body'] = text
            # 提取日期
            sorttime = re.findall('\d{1,2}月.*?日',d['title'])
            if sorttime:
                d['sorttime'] = d['posttime'].split('-')[0] + '年' + sorttime[0]
            #提取审判庭    
            courtNum = re.findall('在(.{2,5}庭)',d['body'])
            if courtNum:
                d['courtNum'] = courtNum[0]
            for i in self.party:
                try:
                    party = re.findall(i,d['body'])[0]
                except IndexError:
                    continue
                else:
                    anyou = ktgg.set_anyou()
                    if type(party) is str:
                        start,end = ktgg.search_anyou(anyou,party)
                        if start == 0:
                            return
                        d['anyou'] = party[start:end]
                        d['pname'] = re.findall('(.*?)%s' % d['anyou'],party)[0].replace('被告人','').replace('被告','').replace('审','').replace('理','')
                    elif type(party) is tuple:
                        start,end = ktgg.search_anyou(anyou,party[1])
                        if start == 0:
                            return
                        d['anyou'] = party[1][start:end]
                        d['plaintiff'] = party[0].replace('原告人','').replace('原告','').replace('审','').replace('理','')
                        d['pname'] = re.findall('(.*?)%s' % d['anyou'],party[1])[0].replace('被告人','').replace('被告','')
                break
            else:
                return
            d['md5'] = ktgg.get_md5(d['body'],d['url'])
            ktgg.ins_mysql(d,'ktgg_kt_wuhan',db,cursor)
コード例 #12
0
ファイル: ziyang.py プロジェクト: tianming1903/ktgg_hunan
    def parse_text(self, text, d, db, cursor):
        if text[0][-2:] == '审理':
            text = [text[0] + text[1]]
        for info in text:
            if len(info) <= 30:
                continue
            d['body'] = info
            d['sorttime'] = ''
            d['courtNum'] = ''
            d['anyou'] = ''
            d['pname'] = ''
            d['plaintiff'] = ''

            # 获取开庭时间
            sorttime = re.findall('\d{2,4}[年月].*?[日号]', info)
            if sorttime:
                d['sorttime'] = sorttime[0]

            # 获取开庭地点
            courtNum = re.findall('第.*?庭', info)
            if courtNum:
                d['courtNum'] = ','.join(courtNum)

            # 获取被告和案由的文本
            p = re.findall('被告.*?[罪案,。]', info)
            if not p:
                continue

            # 获取案由
            anyou = ktgg.set_anyou()
            L = []
            for x in p:
                l = []
                for ay in anyou:
                    if ay in x:
                        l.append(ay)
                if l == []:
                    L.append('')
                    continue
                l.sort(reverse=True, key=len)
                L.append(l[0])
            d['anyou'] = ','.join(L)

            # 获取被告
            pnames = []
            for x, y in zip(p, L):
                if y == '':
                    continue
                pname = re.findall('被告(.*?)%s' % y, x)[0]
                for i in self.p:
                    pname = pname.replace(i, '')
                pnames.append(pname)
            d['pname'] = ','.join(pnames)
            print(d['pname'])

            # 获取原告
            plaintiff = re.findall('原告(.*?)[诉与]', info)
            if plaintiff:
                d['plaintiff'] = plaintiff[0]

            d['md5'] = ktgg.get_md5(info, d['url'])
            ktgg.ins_mysql(d, 'ktgg_kt_wuhan', db, cursor)
コード例 #13
0
    def parse_text(self, text, d, db, cursor):
        # 对文本切片获取每个案列
        infos = re.split('\n', text)

        for info in infos:
            if '案由' in info or not info:
                continue

            # 获取案例的列表
            l = re.split('\s', info)

            # 获取body
            for i in self.tihuan:
                info = info.replace(i, '')
            d['body'] = info

            # 案号
            caseNo = re.findall('[民(].*?号', info)
            d['caseNo'] = ''
            if caseNo:
                d['caseNo'] = caseNo[0]

            # 案由
            b = 0
            anyou = ktgg.set_anyou()
            for i in l:
                for a in anyou:
                    if a in i:
                        d['anyou'] = i
                        b = 1
                        break
                if b == 1:
                    break
            else:
                continue

            # 开庭地点
            for i in l:
                courtNum = re.findall('第.*?庭', i)
                d['courtNum'] = ''
                if courtNum:
                    d['courtNum'] = courtNum[0]
                    break

            # 开庭时间
            sorttime = ''
            for i in l[-1::-1]:
                for x in self.t:
                    if x in i:
                        sorttime = i
                        break
                if sorttime:
                    break
            # 格式化时间
            for i in self.re:
                s = re.findall(i, sorttime)
                if s:
                    d['sorttime'] = s[0]
                    break
            else:
                d['sorttime'] = ''

            if '.' in d['sorttime']:
                times = d['sorttime'].split('.')
                if len(times) == 2:
                    d['sorttime'] = times[0] + '月' + times[1] + '日'
                else:
                    d['sorttime'] = times[0] + '年' + times[1] + '月' + times[
                        2] + '日'
            elif '/' in d['sorttime']:
                times = sorttime.split('/')
                d['sorttime'] = times[0] + '年' + times[1] + '月' + times[2] + '日'

            # 原告和被告
            for i in l:
                if i == d['anyou']:
                    s = l.index(i) + 1
                    while True:
                        if l[s] == '':
                            s += 1
                            continue
                        yuan_bei = l[s]
                        break
                    break

            # 获取原告
            d['party'] = ''
            d['plaintiff'] = ''
            d['pname'] = ''
            if ';' in yuan_bei:
                d['plaintiff'] = yuan_bei.split(';')[0].replace(
                    '原告', '').replace(' ', '').replace(':', '')
                d['pname'] = yuan_bei.split(';')[1].replace('被告', '').replace(
                    ' ', '').replace(':', '')
            elif ';' in yuan_bei:
                d['plaintiff'] = yuan_bei.split(';')[0].replace(
                    '原告', '').replace(' ', '').replace(':', '')
                d['pname'] = yuan_bei.split(';')[1].replace('被告', '').replace(
                    ' ', '').replace(':', '')
            elif '诉' in yuan_bei:
                d['plaintiff'] = yuan_bei.split('诉')[0].replace(
                    '原告', '').replace(' ', '').replace(':', '')
                d['pname'] = yuan_bei.split('诉')[1].replace('被告', '').replace(
                    ' ', '').replace(':', '')
            else:
                d['party'] = yuan_bei
            d['md5'] = ktgg.get_md5(info, d['url'])
            ktgg.ins_mysql(d, 'ktgg_kt_wuhan', db, cursor)