def Web_Api_Error(Judgmentid): """提供WEB API接口使用 GET方法 """ Session_to = returns_session('Judgment') session = Session_to() result = ot_judge_base result_data = session.query(result).filter('id = %s' % Judgmentid).first() if not result_data: return {'Error': u'没有找到文书ID的相应记录', 'status': -3} if 'openlaw' in result_data.url: to = Open else: to = Otraw try: data = session.query(to).filter( 'id = %s' % result_data.parent_id).first() except SQLAlchemyError: return {'Error': u'相关的详细数据', 'status': -4} finally: session.close() if data: analy = Analyse() analy.text = data.content_all _header, part, content, case_sign = analy.split_to_four_parts() ero = Judgment_checking(_header, part, content, case_sign) #: 检查案由 ero.Checking_anyou(result_data.anyou.decode('utf8'), result_data.type.decode('utf8')) #: 检查原告被告 ero.Checking_people(result_data.plaintiff.decode('utf8'), result_data.defendant.decode('utf8')) #: 检查原告被告律师 ero.Checking_lawyer(result_data.plaintiff_lawyers.decode('utf8'), result_data.defendant_lawyers.decode('utf8')) #: 检查署名信息 ero.Checking_sign() #: 检查地区信息 ero.Checking_area(result_data.areacode, result_data.department.decode('utf8')) #: 审理机构检查 ero.Checking_department(result_data.department.decode('utf8')) ero.errors['status'] = 0 if ero.errors: return ero.errors else: return {'Error': u'没有错误信息', 'status': 0} else: return {'Error': u'没有找到相关的记录', 'status': -4}
def to_ot_process_error(self, old, todat): """此方法用于写库,如有需求写入新库可以调用此方法。 """ print old.id, old.url olds = Otraw print "[Analysis to %s] time: %s " % (todat, time.strftime('%y-%m-%d %H-%M-%S')) if ('openlaw' in old.url): self.point.set_tablename(Open) self.point.set_filter(filter='id = %s' % old.parent_id, limit=1) old_data = self.point.query() else: self.point.set_tablename(olds) self.point.set_filter(filter='id = %s' % old.parent_id, limit=1) old_data = self.point.query() if not old_data: return old_data = old_data[0] analy = Analyse() analy.text = old_data.content_all _header, part, content, case_sign = analy.split_to_four_parts() #: 开始检测数据错误 ero = Judgment_checking(_header, part, content, case_sign) #: 检查案由 ero.Checking_anyou(old.anyou) #: 检查原告被告 ero.Checking_people(old.plaintiff, old.defendant) #: 检查原告被告律师 ero.Checking_lawyer(old.plaintiff_lawyers, old.defendant_lawyers) #: 检查署名信息 ero.Checking_sign() if ero.errors: # return list(set(ero.errors)) for item in ero.errors: new = ot_process_error() new.judge_id = old.id new.action = item[0] new.error = item[1] new.user_name = 'System' new.addtime = str(int(time.time())) point = insert_database( 'inspection', tablename=todat, editor=new) point.update() return
def fuzzy_analyse(self, old_data): "分析方法,开始对数据进行分析" analy = Analyse() analy.text = old_data.content_all _header, part, content, case_sign = analy.split_to_four_parts() if len(_header.split('\n')) < 4: _header = "\n".join(analy.text_in_lines[0:6]) _header = re.sub(u'日期:|法院:|案号:', '', _header) clients_attr, lawyers_attr = analy.guess_clients_lawyers( part.split('\n')) case_sign_key = analy.guess_case_sign(case_sign.split('\n')) head_key = analy.guess_header_types(_header.split('\n')) clients_attr[u'原告'] = list(set(clients_attr[u'原告'])) clients_attr[u'被告'] = list(set(clients_attr[u'被告'])) lawyers_attr[u'原告'] = list(set(lawyers_attr[u'原告'])) lawyers_attr[u'被告'] = list(set(lawyers_attr[u'被告'])) plaintiff = '' defendant = '' plaintiff_lawyers = '' defendant_lawyers = '' end_time = analy.guess_end_date(case_sign) replace_data = analy._replace_data(part) if clients_attr[u'原告']: plaintiff = ';'.join( u"%s:%s:%s" % client for client in clients_attr[u'原告']) if clients_attr[u'被告']: defendant = ';'.join( u"%s:%s:%s" % client for client in clients_attr[u'被告']) if lawyers_attr[u'原告']: plaintiff_lawyers = ';'.join( u"%s:%s" % lawyer for lawyer in lawyers_attr[u'原告']) if lawyers_attr[u'被告']: defendant_lawyers = ';'.join( u"%s:%s" % lawyer for lawyer in lawyers_attr[u'被告']) return [(_header, part, content, case_sign), (plaintiff, plaintiff_lawyers), (defendant, defendant_lawyers), case_sign_key, head_key, replace_data, end_time]
def to_ot_rawdata_judgement_court_gov_cn_old(self, old, todat): new = ot_rawdata_judgement_court_gov_cn_old() new.url = old.url new.referer = old.url analy = Analyse() try: raw_html = XPath(old.source_data).execute( '//*[@id="ws"]/table')[0].to_html() except IndexError: print '[Error] Analyse: url = %s' % new.url # Request Get for item in PROXY: r = requests.get( new.url, proxies={'http': 'http:%s:59274' % item}, timeout=30) if r.ok: break if not r.ok: raise Exception,\ 'Get faild url = %s' % old.url to = old.__class__() to.id = old.id to.source_data = r.text raw_html = XPath(to.source_data).execute( '//*[@id="ws"]/table')[0].to_html() point = insert_database( 'Judgment', tablename=to.__class__, editor=new) point.update() return text = html_to_text(HTML_PARSER.unescape(raw_html)) try: text = re.sub('//W3C//DTD HTML 4.0 Transitional//EN\'>', '', text) except: pass analy.text = text new.content_all = analy.text _header, part, content, case_sign = analy.split_to_four_parts() new.clients_attr, new.lawyers_attr = analy.guess_clients_lawyers( part.split('\n')) end_date = analy.guess_end_date(case_sign) new.end_date = end_date case_sign_key = analy.guess_case_sign(case_sign.split('\n')) head_key = analy.guess_header_types(_header.split('\n')) new.content = part + content new.case_sign = case_sign new.case_number = head_key['case_number'] new.department = head_key['department'] new.type = head_key['type'] new.title = head_key['title'] new.case_type = head_key['case_type'] new.procedure = new.procedure or analy.guess_procedure(new.case_number) new.replace_data = json.dumps(analy._replace_data(part)) new.chief_judge = ",".join(case_sign_key[u'审判长']) new.acting_judges = ",".join(case_sign_key[u'代理审判员']) new.judge = ",".join(case_sign_key[u'审判员']) new.clerk = ",".join(list(set(case_sign_key[u'书记员']))) new.input_time = arrow.now().timestamp # if (not new.chief_judge and not new.judge and not new.acting_judges.strip()) or \ # (u'事务所' not in new.plaintiff_lawyers and u'事务所' not in new.defendant_lawyers): # return new.parent_id = old.id print 'Runing String <ot_rawdata_judgement_court_gov_cn_old> parent_id = %s , url = %s' % (old.id, old.url) point = insert_database( 'Judgment', tablename=ot_rawdata_judgement_court_gov_cn_old, editor=new) point.insert()
def Web_Api_On_Check(**keyword): """提供POST方法, 可供分析字段 """ Session_to = returns_session('Judgment') session = Session_to() result = ot_judge_base result_data = session.query(result).filter( 'id = %s' % keyword['pid']).first() if not result_data: return {'Error': u'没有找到文书ID的相应记录', 'status': -3} if 'openlaw' in result_data.url: to = Open else: to = Otraw try: data = session.query(to).filter( 'id = %s' % result_data.parent_id).first() except SQLAlchemyError: return {'Error': u'相关的详细数据', 'status': -4} finally: session.close() if data: #: 载入分析方法 analy = Analyse() analy.text = data.content_all #: 将文本分为4个段落 _header, part, content, case_sign = analy.split_to_four_parts() ero = Judgment_checking(_header, part, content, case_sign) #: 检查案由 ero.Checking_anyou(keyword['anyou'], result_data.type) #: 检查原告被告 ero.Checking_people(keyword['plaintiff'], keyword['defendant']) #: 检查原告被告律师 ero.Checking_lawyer( keyword['plaintiff_lawyers'], keyword['defendant_lawyers']) #: 载入发过来的署名信息买,存入一个字典当中 keys = {} keys['chief_judge'] = keyword['chief_judge'] keys['judge'] = keyword['judge'] keys['acting_judges'] = keyword['acting_judges'] keys['clerk'] = keyword['clerk'] #: 检查署名信息 #: ero.Checking_sign(keys) 可不传递keys传递时会给出更加精确的判断 ero.Checking_sign(keys) #: 检查地区信息 ero.Checking_area( keyword['areacode'], result_data.department.decode('utf8')) if ero.errors: # status=0 必须放在这里!不然其他程序调用时就不是那么好判断了 ero.errors['status'] = 0 return ero.errors else: return {'Error': u'没有错误信息', 'status': 0} else: return {'Error': u'没有找到相关的记录', 'status': -4}