mb = MysqlBase(connecter) def extract_business_entity(s): if '法定代表人' in s: l = s.rfind('(') r = s.rfind(')') if l != -1 and r != -1: t = s.rfind(':') name = s[:l] business_entity = s[t+1:r] return name, business_entity else: return s, '' for items in mb._execute("SELECT * FROM judge_doc_shgy.pinggupaimai limit 1000; "): try: obj = {} d = pq(items['content']) key_msg = [each.text() for n, each in enumerate(d(".tdnr").items()) if n % 2 == 0] value_msg = [each.text() for n, each in enumerate(d(".tdnr").items()) if n % 2 != 0] msg = dict(zip(key_msg, value_msg)) limiting_cause = [d.text() for d in d('.nr div.nr').items() if len(d.text()) > 10][0] court_name, tel_of_court = msg['承办法院、联系电话'].split('\xa0\xa0') auction_house, tel_of_auction = msg['拍卖机构'].split(':') name, business_entity = extract_business_entity(msg['被执行人']) obj = { 'exposure_type': '执行案件评估拍卖', 'identity': '被执行人', 'name': name, 'business_entity': business_entity,
if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc', field='case_no', value=obj['case_no']): ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/total_doc', data=obj) if __name__ == '__main__': id = 1 l_id = 1 r_id = 1 mb = MysqlBase(connecter) for i in range(1): for items in mb._execute( "select * from `judge_doc_new` where id = '85737' ".format( (i + 3) * 1000)): print('process id:{}'.format(items['id'])) print(items) # obj = {} # id = id + 1 # table_id = items['id'] # shls = shanghai_list(items) # trial_type = shanghai_trial_type(items) # court_name = shanghai_court_name(items) # # content = shanghai_content(items) # article = shanghai_aricle(items) # # ws = WenshuBase(article) # litigants, agents = litigants_agent_extract('\n'.join(ws.role_paragraph)) # court_officers = court_extract('\n'.join(ws.court_paragraph))
# ws = WenshuBase(article) # # # persons = person_extract('\n'.join(ws.role_paragraph)) # for p in local_person(persons): # pass # # print(p) # # # litigants, agents = litigants_agent_extract('\n'.join(ws.role_paragraph)) # for litigant in litigants: # print(json.dumps(litigants)) # # print(agents) # # print('*'*100) # # courts = court_extract('\n'.join(ws.court_paragraph)) # for c in courts: # # print(c) # pass connecter = { 'host': '10.1.1.25', 'user': '******', 'password': '******', 'db': 'judge_doc_shgy' } mb = MysqlBase(connecter) for items in mb._execute("select * from judge_doc limit 10"): a = shanghai_aricle(items) litigants_agent_extract(a)
def chongqing_article(items): content = items['content'] content = re.sub(' ', '', content) content = re.sub(' ', '', content) d = pq(content) dd = d('span') return dd.text().split(' ') def chongqing_list(items): content = items['content'] content = re.sub(' ', '', content) content = re.sub(' ', '', content) d = pq(content) dd = d('span') case_no = dd.text().split(' ')[4] obj = { 'type': items.get('category'), 'title': items.get('title'), 'court_name': items.get('court'), 'case_no': case_no } return obj if __name__ == '__main__': mb = MysqlBase(connecter) for item in mb._execute("select * from judge_doc_chongqing limit 1;"): print(chongqing_list(item))
def xinjiang_list(items): content = items['content'] content = re.sub(' ', '', content) content = re.sub(' ', '', content) content = re.sub(' ', '', content) d = pq(content) dd = d('p') case_no = dd.text().split(' ')[0] obj = { 'title': items.get('title'), 'court_name': items.get('court'), 'case_no': case_no } return obj connecter = { 'host': '10.1.1.40', 'user': '******', 'password': '******', 'db': 'judge_doc' } if __name__ == '__main__': mb = MysqlBase(connecter) for item in mb._execute("select * from judge_doc_xinjiang limit 1;"): print('\n'.join(xinjiang_article(item))) print(xinjiang_list(item))
path='http://10.1.1.28:9200/judge_doc/local_doc', data=obj) if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc', field='case_no', value=obj['case_no']): ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/total_doc', data=obj) if __name__ == '__main__': mb = MysqlBase(connecter) for i in range(1): for items in mb._execute( "select * from judge_doc_hubei where id > {} limit 30".format( 220)): try: print(items['id']) l = hubei_list(items) a = hubei_article(items) ws = WenshuBase('\n'.join(a[3:])) litigants, agents = litigants_agent_extract('\n'.join( ws.role_paragraph)) court_officers = court_extract('\n'.join(ws.court_paragraph)) reasons = reason_extract( reason_description=ws.reason_description, title=l.get('title'), trial_type=l.get('type'))
'河北省高级人民法院网-执行信息-限制招投标', '河北省高级人民法院网-执行信息-限制高消费人', '河南法院诉讼服务网-执行信息-未结执行实施案件', '新疆法院诉讼服务网-执行信息-失信被执行人', '新疆法院诉讼服务网-执行信息-被执行人', '上海市高级人民法院网-执行信息-曝光台', '上海市高级人民法院网-执行信息-网上追查', '上海市高级人民法院网-执行信息-限制出境', '上海市高级人民法院网-执行信息-限制高消费', '浙江法院公开网 - 执行信息 - 曝光台(个人)', '浙江法院公开网 - 执行信息 - 限制出境', '浙江法院公开网 - 执行信息 - 限制招投标', '浙江法院公开网 - 执行信息 - 限制高消费' ] mb = MysqlBase(connecter) a = -1 while 1: a = a + 1 for data_path in data_paths: if '四川' in data_path: table = 'zhixing' for items in mb._execute( "select * from zhixing where data_path='{}' order by id limit {},10000;" .format(data_path, a * 10000)): try: run(items) mb._update( "UPDATE {} SET is_process = {} WHERE id= '{}'". format(table, items['id']), 100) except: mb._update( "UPDATE {} SET is_process = {} WHERE id= '{}'". format(table, items['id'], 200)) pass else: for items in mb._execute( "select * from zhixing_no_detail where data_path='{}' order by id limit {},10000;" .format(data_path, a * 10000)):
ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/local_doc', data=obj) if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc', field='case_no', value=obj['case_no']): ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/total_doc', data=obj) if __name__ == '__main__': mb = MysqlBase(connector) for i in range(1): for items in mb._execute( "select * from judge_doc_qinghai limit 1".format()): print(items['id']) try: l = qinghai_list(items) a = qinghai_article(items) ws = WenshuBase('\n'.join(a[3:])) litigants, agents = litigants_agent_extract('\n'.join( ws.role_paragraph)) court_officers = court_extract('\n'.join(ws.court_paragraph)) type = type_extract(l.get('title')) reasons = reason_extract( reason_description=ws.reason_description, title=l.get('title'), trial_type=type) trial_date = trial_date_extract(''.join(ws.court_paragraph))
ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/local_doc', data=obj) if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc', field='case_no', value=obj['case_no']): ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/total_doc', data=obj) if __name__ == '__main__': mb = MysqlBase(connector) for i in range(1): for items in mb._execute( "select * from judge_doc_guizhou limit 1000".format()): print(items['id']) try: l = guizhou_list(items) a = guizhou_article(items) ws = WenshuBase('\n'.join(a[3:])) litigants, agents = litigants_agent_extract('\n'.join( ws.role_paragraph)) court_officers = court_extract('\n'.join(ws.court_paragraph)) type = type_extract(l.get('title')) reasons = reason_extract( reason_description=ws.reason_description, title=l.get('title'), trial_type=type) trial_date = trial_date_extract(''.join(ws.court_paragraph)) court_level = court_level_extract(l.get('court_name'))
return dd.text().split(' ') def liaoning_list(items): content = items['content'] content = re.sub(' ', '', content) content = re.sub(' ', '', content) d = pq(content) dd = d('p') case_no = dd.text().split(' ')[0] obj = { 'title': items.get('title'), 'court_name': items.get('court'), 'case_no': case_no } return obj connecter = { 'host': '10.1.1.40', 'user': '******', 'password': '******', 'db': 'judge_doc' } if __name__ == '__main__': mb = MysqlBase(connecter) for item in mb._execute("select * from judge_doc_liaoning limit 1;"): print('\n'.join(liaoning_article(item))) print(liaoning_list(item))
ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/local_doc', data=obj) if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc', field='case_no', value=obj['case_no']): ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/total_doc', data=obj) if __name__ == '__main__': mb = MysqlBase(connector) for i in range(1): for items in mb._execute( "select * from judge_doc_fujian limit 10".format()): print(items['id']) try: l = fujian_list(items) a = fujian_article(items) ws = WenshuBase('\n'.join(a[2:])) litigants, agents = litigants_agent_extract('\n'.join( ws.role_paragraph)) court_officers = court_extract('\n'.join(ws.court_paragraph)) reasons = reason_extract( reason_description=ws.reason_description, title=l.get('title'), trial_type=l.get('type')) trial_date = trial_date_extract(''.join(ws.court_paragraph)) court_level = court_level_extract(l.get('court_name'))
path='http://10.1.1.28:9200/judge_doc/local_doc', data=obj) if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc', field='case_no', value=obj['case_no']): ines(id=obj['instrument_id'], path='http://10.1.1.28:9200/judge_doc/total_doc', data=obj) if __name__ == '__main__': mb = MysqlBase(connector) for i in range(1): for items in mb._execute( "select * from wenshu_beijing where id = {} limit 1".format( 337)): try: print(items['id']) l = beijing_list(items) a = beijing_article(items) ws = WenshuBase('\n'.join(a[2:])) litigants, agents = litigants_agent_extract('\n'.join( ws.role_paragraph)) court_officers = court_extract('\n'.join(ws.court_paragraph)) reasons = reason_extract( reason_description=ws.reason_description, title=l.get('title'), trial_type=l.get('type'), reason=l.get('reason'))
def guizhou_list(items): content = items['detail_response'] content = json.loads(content)['data'] content = content.replace('\\', '') content = re.sub(' style.*?>', '>', content) content = re.sub(' ', '', content) content = re.sub(' ', '', content) content = '<HTML>' + content + '</html>' d = pq(content) dd = d('div') court = dd.text().split(' ')[0] case_no = dd.text().split(' ')[2] list_response = items['list_response'] title = re.findall('title="(.*?)"', list_response) title = title[0] if title else '' obj = {'title': title, 'court_name': court, 'case_no': case_no} return obj connecter = { 'host': '10.1.1.40', 'user': '******', 'password': '******', 'db': 'judge_doc' } if __name__ == '__main__': mb = MysqlBase(connecter) for item in mb._execute("select * from judge_doc_guizhou limit 1;"): print('\n'.join(guizhou_article(item))) print(guizhou_list(item))
def zhejiang_article(items): detail_response = items['detail_response'] html = re.findall('(<html.*?</html>)', detail_response)[0] html = re.sub('style.*?>', '>', html) html = re.sub('<html.*?>', '<html>', html) html = re.sub('<meta.*?>', '', html) html = re.sub(' >', '>', html) html = re.sub('</p><p>', '', html) html = re.sub(' ', '', html) html = re.sub(' ', '', html) d = pq(html) return d('span').text().split(' ') def zhejiang_list(items): l = items['list_response'] l = eval(l) obj = { 'type': l.setdefault('AJLB'), 'court_name': l.setdefault('CourtName'), 'case_no': l.setdefault('AH') } return obj mb = MysqlBase(connecter) for items in mb._execute("select * from wenshu_zhejiang1 limit 1"): print(zhejiang_list(items)) print(zhejiang_article(items))
content('p').remove() announcer, ann_date = content('span').text().split(' ') content('span').remove() c = content.text() i = c.find(':') defendant_origin = c[:i] defendants = c[:i].strip().split('、') defendants.append(defendant) ann_content = c[i + 1:].strip() dd = re.findall('(\d+)年(\d+)月(\d+)日', ann_date)[0] for d in defendants: yield { 'ann_type': ann_type, 'announcer': announcer, 'defendant': d, 'defendant_origin': defendant_origin, 'ann_date': date(year=int(dd[0]), month=int(dd[1]), day=int(dd[2])).isoformat(), 'ann_content': ann_content, 'ann_html': article } mb = MysqlBase(connecter) for item in mb._execute("select * from sh_sdgg where is_process = 0"): article = items['detail'] for e in extract_fygg(article): id = get_md5(e['ann_type']) + get_md5(e['defendant']) + get_md5(e['ann_date']) e['id'] = id ines(id=id, path='http://10.1.1.28:9200/court_announcement/court_announcement', data=e)
return dd.text().split(' ') def jilin_list(items): content = items['content'] content = re.sub(' ', '', content) content = re.sub(' ', '', content) d = pq(content) dd = d('p') case_no = dd.text().split(' ')[0] obj = { 'title': items.get('title'), 'court_name': items.get('court'), 'case_no': case_no } return obj connecter = { 'host': '10.1.1.40', 'user': '******', 'password': '******', 'db': 'judge_doc' } if __name__ == '__main__': mb = MysqlBase(connecter) for item in mb._execute("select * from judge_doc_jilin limit 1;"): print('\n'.join(jilin_article(item))) print(jilin_list(item))
return dd.text().split(' ') def qinghai_list(items): content = items['content'] content = re.sub(' ', '', content) content = re.sub(' ', '', content) d = pq(content) dd = d('p') case_no = dd.text().split(' ')[0] obj = { 'title': items.get('title'), 'court_name': items.get('court'), 'case_no': case_no } return obj connecter = { 'host': '10.1.1.40', 'user': '******', 'password': '******', 'db': 'judge_doc' } if __name__ == '__main__': mb = MysqlBase(connecter) for item in mb._execute("select * from judge_doc_qinghai limit 1;"): # print('\n'.join(qinghai_list(item))) print(qinghai_list(item))
def hubei_list(items): content = items['content'] title = content[2:content.find('.htm";')] content = re.sub('<', '<', content) content = re.sub('>', '>', content) content = re.sub('style=.*?>', '>', content) content = re.sub(' >', '>', content) content = re.sub(' ', '', content) content = content[content.find('<BODY>'):] content = re.sub('<BODY>;', '', content) content = re.sub(' ', '', content) content = re.sub('\u3000', '', content) d = pq('<HTML>' + content) dd = d('div') case_no = dd.text().split(' ')[2] court = dd.text().split(' ')[0] obj = { 'type': re.sub('文书', '案件', items.get('category')), 'title': title, 'court_name': court, 'case_no': case_no } return obj if __name__ == '__main__': mb = MysqlBase(connecter) for item in mb._execute("select * from judge_doc_hubei where id = 5;"): print('\n'.join(hubei_article(item)))
from IKEA.shixin.config import connecter as connecter import requests from datetime import date from datetime import timedelta def robot(content): data = {"msgtype": "text", "text": {"content": content}} page = requests.post( 'https://oapi.dingtalk.com/robot/send?access_token=ed2a4f043112a17e542d226e551b58aa4dc4a0399a4701465553ece93429dd49', json=data) return page.text yesterday = (date.today() - timedelta(days=1)).isoformat() mb = MysqlBase(connecter) table = 'zhixing_no_detail' j = {"msgtype": "text", "text": {"content": ""}} content = '' for items in mb._execute( "select `data_path` ,count(`data_path` ) as c from `{}` group by `data_path` " .format(table)): data_path = items['data_path'] total = items['c'] for num in mb._execute( "select count(*) as c from {} where create_time>'{}' and data_path='{}'" .format(table, '2017-08-30', data_path)): content = content + '{} {} {}\n'.format(data_path, num['c'], total) robot(content)