#: 载入案由配置以及地区配置 from configure import anyou_replace, Area_duct, case_mode, OFFICE #from StringIO import StringIO #from hashlib import md5 from configure import PROXY #: 载入基础模块 import time import json import re import arrow import sys import requests reload(sys) sys.setdefaultencoding('utf-8') actions_id, actions, replace, anyou_alias = anyou_replace('Judgment') area = Area_duct() class JudgmentAnalysis(Process): def to_ot_rawdata_judgement_court_gov_cn_old(self, old, todat): new = ot_rawdata_judgement_court_gov_cn_old() new.url = old.url new.referer = old.url analy = Analyse() try: raw_html = XPath(old.source_data).execute( '//*[@id="ws"]/table')[0].to_html()
#: from pybamboo import Bamboo #: 载入基础模块 import time import json import sys import arrow import re reload(sys) sys.setdefaultencoding('utf-8') #: 定制全局变量 AREA = Area_duct() ACTION_ID, ACTIONS, REPLACE, ANYOU_ALIAS = anyou_replace('Judgment') class JudgmentAnalysis(Process): """继承程序运行模块,开始对任务进行分析 """ def fuzzy_analyse(self, old_data): "分析方法,开始对数据进行分析" analy = Analyse() analy.text = old_data.content_all _header, part, content, case_sign = analy.split_to_four_parts()
def insert_base(old, Update_id=0): area = Area_duct() actions_id, actions, replace, anyou_alias = anyou_replace("Judgment") new = ot_judge_base() for attr in ("content", "case_sign", "case_type", "department", "end_date"): if getattr(old, attr) is None: print >> __output__, u"【提示】%s 字段为空,请检查数据" % attr return # if old.case_type not in self.case_mode: # return for mode in case_mode: if mode in old.case_type: new.case_type = mode if not new.case_type: print >> __output__, u"【提示】文书字号为空" return if old.content == "": print >> __output__, "u你所访问的数据为空" new.content = "<p>" + "</p><p>".join(old.content.split("\n")) + "</p>" new.content_md5 = md5(new.content.encode("utf8")).hexdigest() new.case_sign = "<p>" + "</p><p>".join(old.case_sign.split("\n")) + "</p>" new.case_number = old.case_number new.type = new.case_type[:-3] #: 如果是仲裁,那属于民事 if new.type == u"仲裁": new.type = u"民事" new.title = old.title if not new.title: new.title = old.content_all.split("\n")[0] Pules = {} #: 更新案由信息 if new.type == u"行政": anyou = filter(lambda x: x in new.title, actions) if not anyou: anyou = filter(lambda x: x in old.content.split("\n")[0], actions) if anyou: anyou = anyou[0] else: for item in anyou: Pules[len(item)] = item else: anyou = filter(lambda x: x in old.content.split("\n")[0], actions) if not anyou: anyou = filter(lambda x: x in new.title, actions) for item in anyou: Pules[len(item)] = item else: anyou = anyou[0] if Pules: anyou = Pules[max(Pules)] new.anyou_id = actions_id[anyou.strip()] new.anyou = anyou # new.anyou_id = actions_id[new.anyou] new.department = old.department new.chief_judge = old.chief_judge new.judge = old.judge if old.acting_judges: new.acting_judges = old.acting_judges else: new.acting_judges = "无" new.clerk = old.clerk new.plaintiff = ";".join(u"%s:%s:%s" % client for client in old.clients_attr[u"原告"]) new.plaintiff_lawyers = ";".join(u"%s:%s" % lawyer for lawyer in old.lawyers_attr[u"原告"]) print >> __plai_people__, u"原告:%s, 律师:%s" % (new.plaintiff, new.plaintiff_lawyers) new.defendant = ";".join(u"%s:%s:%s" % client for client in old.clients_attr[u"被告"]) new.defendant_lawyers = ";".join(u"%s:%s" % lawyer for lawyer in old.lawyers_attr[u"被告"]) print >> __defan_people__, u"被告:%s, 律师:%s" % (new.defendant, new.defendant_lawyers) new.procedure = old.procedure new.end_date = arrow.get(old.end_date, "Asia/Shanghai").timestamp # 分析地区 area_item = area.ident(new.department.encode("gbk")) if area_item and area_item.get("staut") == "timed out": new.areacode = area_item["areano"] new.url = old.referer print >> __infomation__, old.case_sign new.replace_data = json.dumps(old.replace_data) # dic = {} # for k, v in old.replace_data.iteritems(): # if not re.match(ur".*(某|X|x|\*).*", k): # dic.update({k:v}) # new.replace_data = json.dumps(dic) new.input_time = arrow.now().timestamp if (not new.chief_judge and not new.judge and not new.acting_judges.strip()) or ( u"事务所" not in new.plaintiff_lawyers and u"事务所" not in new.defendant_lawyers ): print >> __output__, u"不存在事务所或者署名信息" return print old.id if Update_id != 0: new.id = Update_id new.url = old.url point = insert_database("Judgment", tablename=ot_judge_base, editor=new) point.update() else: point = insert_database("Judgment", tablename=ot_judge_base, editor=new) point.insert()