예제 #1
0
def Web_Api_Error(Judgmentid):
    """提供WEB API接口使用 GET方法

    """
    Session_to = returns_session('Judgment')
    session = Session_to()
    result = ot_judge_base

    result_data = session.query(result).filter('id = %s' % Judgmentid).first()
    if not result_data:
        return {'Error': u'没有找到文书ID的相应记录', 'status': -3}

    if 'openlaw' in result_data.url:
        to = Open
    else:
        to = Otraw
    try:
        data = session.query(to).filter(
            'id = %s' % result_data.parent_id).first()
    except SQLAlchemyError:
        return {'Error': u'相关的详细数据', 'status': -4}
    finally:
        session.close()
    if data:
        analy = Analyse()
        analy.text = data.content_all
        _header, part, content, case_sign = analy.split_to_four_parts()
        ero = Judgment_checking(_header, part, content, case_sign)

        #: 检查案由
        ero.Checking_anyou(result_data.anyou.decode('utf8'),
                           result_data.type.decode('utf8'))

        #: 检查原告被告
        ero.Checking_people(result_data.plaintiff.decode('utf8'),
                            result_data.defendant.decode('utf8'))

        #: 检查原告被告律师
        ero.Checking_lawyer(result_data.plaintiff_lawyers.decode('utf8'),
                            result_data.defendant_lawyers.decode('utf8'))

        #: 检查署名信息
        ero.Checking_sign()

        #: 检查地区信息
        ero.Checking_area(result_data.areacode,
                          result_data.department.decode('utf8'))

        #: 审理机构检查
        ero.Checking_department(result_data.department.decode('utf8'))

        ero.errors['status'] = 0
        if ero.errors:
            return ero.errors
        else:
            return {'Error': u'没有错误信息', 'status': 0}
    else:
        return {'Error': u'没有找到相关的记录', 'status': -4}
예제 #2
0
    def to_ot_process_error(self, old, todat):
        """此方法用于写库,如有需求写入新库可以调用此方法。

        """

        print old.id, old.url
        olds = Otraw
        print "[Analysis to %s] time: %s " % (todat, time.strftime('%y-%m-%d %H-%M-%S'))
        if ('openlaw' in old.url):
            self.point.set_tablename(Open)
            self.point.set_filter(filter='id = %s' % old.parent_id, limit=1)

            old_data = self.point.query()

        else:
            self.point.set_tablename(olds)
            self.point.set_filter(filter='id = %s' % old.parent_id, limit=1)

            old_data = self.point.query()
        if not old_data:
            return

        old_data = old_data[0]

        analy = Analyse()
        analy.text = old_data.content_all

        _header, part, content, case_sign = analy.split_to_four_parts()

        #: 开始检测数据错误
        ero = Judgment_checking(_header, part, content, case_sign)

        #: 检查案由
        ero.Checking_anyou(old.anyou)

        #: 检查原告被告
        ero.Checking_people(old.plaintiff, old.defendant)

        #: 检查原告被告律师
        ero.Checking_lawyer(old.plaintiff_lawyers, old.defendant_lawyers)

        #: 检查署名信息
        ero.Checking_sign()

        if ero.errors:
            # return list(set(ero.errors))
            for item in ero.errors:
                new = ot_process_error()
                new.judge_id = old.id
                new.action = item[0]
                new.error = item[1]
                new.user_name = 'System'
                new.addtime = str(int(time.time()))
                point = insert_database(
                    'inspection', tablename=todat, editor=new)
                point.update()
        return
예제 #3
0
    def fuzzy_analyse(self, old_data):
        "分析方法,开始对数据进行分析"
        analy = Analyse()
        analy.text = old_data.content_all

        _header, part, content, case_sign = analy.split_to_four_parts()
        if len(_header.split('\n')) < 4:
            _header = "\n".join(analy.text_in_lines[0:6])
        _header = re.sub(u'日期:|法院:|案号:', '', _header)
        clients_attr, lawyers_attr = analy.guess_clients_lawyers(
            part.split('\n'))
        case_sign_key = analy.guess_case_sign(case_sign.split('\n'))
        head_key = analy.guess_header_types(_header.split('\n'))

        clients_attr[u'原告'] = list(set(clients_attr[u'原告']))
        clients_attr[u'被告'] = list(set(clients_attr[u'被告']))
        lawyers_attr[u'原告'] = list(set(lawyers_attr[u'原告']))
        lawyers_attr[u'被告'] = list(set(lawyers_attr[u'被告']))

        plaintiff = ''
        defendant = ''
        plaintiff_lawyers = ''
        defendant_lawyers = ''
        end_time = analy.guess_end_date(case_sign)
        replace_data = analy._replace_data(part)

        if clients_attr[u'原告']:
            plaintiff = ';'.join(
                u"%s:%s:%s" % client for client in clients_attr[u'原告'])
        if clients_attr[u'被告']:
            defendant = ';'.join(
                u"%s:%s:%s" % client for client in clients_attr[u'被告'])
        if lawyers_attr[u'原告']:
            plaintiff_lawyers = ';'.join(
                u"%s:%s" % lawyer for lawyer in lawyers_attr[u'原告'])
        if lawyers_attr[u'被告']:
            defendant_lawyers = ';'.join(
                u"%s:%s" % lawyer for lawyer in lawyers_attr[u'被告'])

        return [(_header, part, content, case_sign),
                (plaintiff, plaintiff_lawyers),
                (defendant, defendant_lawyers),
                case_sign_key,
                head_key,
                replace_data,
                end_time]
예제 #4
0
    def to_ot_rawdata_judgement_court_gov_cn_old(self, old, todat):

        new = ot_rawdata_judgement_court_gov_cn_old()
        new.url = old.url
        new.referer = old.url

        analy = Analyse()
        try:
            raw_html = XPath(old.source_data).execute(
                '//*[@id="ws"]/table')[0].to_html()
        except IndexError:
            print '[Error] Analyse: url = %s' % new.url

            # Request Get
            for item in PROXY:

                r = requests.get(
                    new.url, proxies={'http': 'http:%s:59274' % item}, timeout=30)
                if r.ok:
                    break
            if not r.ok:
                raise Exception,\
                    'Get faild url = %s' % old.url

            to = old.__class__()
            to.id = old.id
            to.source_data = r.text
            raw_html = XPath(to.source_data).execute(
                '//*[@id="ws"]/table')[0].to_html()
            point = insert_database(
                'Judgment', tablename=to.__class__, editor=new)
            point.update()
            return
        text = html_to_text(HTML_PARSER.unescape(raw_html))
        try:
            text = re.sub('//W3C//DTD HTML 4.0 Transitional//EN\'>', '', text)
        except:
            pass
        analy.text = text
        new.content_all = analy.text

        _header, part, content, case_sign = analy.split_to_four_parts()

        new.clients_attr, new.lawyers_attr = analy.guess_clients_lawyers(
            part.split('\n'))

        end_date = analy.guess_end_date(case_sign)
        new.end_date = end_date

        case_sign_key = analy.guess_case_sign(case_sign.split('\n'))
        head_key = analy.guess_header_types(_header.split('\n'))

        new.content = part + content

        new.case_sign = case_sign
        new.case_number = head_key['case_number']
        new.department = head_key['department']
        new.type = head_key['type']
        new.title = head_key['title']
        new.case_type = head_key['case_type']

        new.procedure = new.procedure or analy.guess_procedure(new.case_number)

        new.replace_data = json.dumps(analy._replace_data(part))

        new.chief_judge = ",".join(case_sign_key[u'审判长'])
        new.acting_judges = ",".join(case_sign_key[u'代理审判员'])
        new.judge = ",".join(case_sign_key[u'审判员'])
        new.clerk = ",".join(list(set(case_sign_key[u'书记员'])))

        new.input_time = arrow.now().timestamp

        # if (not new.chief_judge and not new.judge and not new.acting_judges.strip()) or \
        #   (u'事务所' not in new.plaintiff_lawyers and u'事务所' not in new.defendant_lawyers):
        #    return

        new.parent_id = old.id
        print 'Runing String <ot_rawdata_judgement_court_gov_cn_old> parent_id = %s , url = %s' % (old.id, old.url)

        point = insert_database(
            'Judgment', tablename=ot_rawdata_judgement_court_gov_cn_old, editor=new)
        point.insert()
예제 #5
0
def Web_Api_On_Check(**keyword):
    """提供POST方法, 可供分析字段

    """

    Session_to = returns_session('Judgment')
    session = Session_to()
    result = ot_judge_base

    result_data = session.query(result).filter(
        'id = %s' % keyword['pid']).first()
    if not result_data:
        return {'Error': u'没有找到文书ID的相应记录', 'status': -3}
    if 'openlaw' in result_data.url:
        to = Open
    else:
        to = Otraw
    try:
        data = session.query(to).filter(
            'id = %s' % result_data.parent_id).first()
    except SQLAlchemyError:
        return {'Error': u'相关的详细数据', 'status': -4}
    finally:
        session.close()
    if data:

        #: 载入分析方法
        analy = Analyse()
        analy.text = data.content_all

        #: 将文本分为4个段落
        _header, part, content, case_sign = analy.split_to_four_parts()
        ero = Judgment_checking(_header, part, content, case_sign)

        #: 检查案由
        ero.Checking_anyou(keyword['anyou'], result_data.type)

        #: 检查原告被告
        ero.Checking_people(keyword['plaintiff'], keyword['defendant'])

        #: 检查原告被告律师
        ero.Checking_lawyer(
            keyword['plaintiff_lawyers'], keyword['defendant_lawyers'])

        #: 载入发过来的署名信息买,存入一个字典当中
        keys = {}
        keys['chief_judge'] = keyword['chief_judge']
        keys['judge'] = keyword['judge']
        keys['acting_judges'] = keyword['acting_judges']
        keys['clerk'] = keyword['clerk']
        #: 检查署名信息
        #: ero.Checking_sign(keys) 可不传递keys传递时会给出更加精确的判断
        ero.Checking_sign(keys)

        #: 检查地区信息
        ero.Checking_area(
            keyword['areacode'], result_data.department.decode('utf8'))

        if ero.errors:
            # status=0 必须放在这里!不然其他程序调用时就不是那么好判断了
            ero.errors['status'] = 0
            return ero.errors
        else:
            return {'Error': u'没有错误信息', 'status': 0}
    else:
        return {'Error': u'没有找到相关的记录', 'status': -4}