Example #1
0
def mining(items):
    obj = {}
    a = items.get('articles')
    articles = eval(items.get('articles')) if a else []
    article = '\n'.join(articles)
    title = items.get('title')

    # 必须包括的
    obj['case_no'] = items.get('case_no', '')
    obj['publish_date'] = items.get('publish_date')
    obj['court_name'] = items.get('court_name', '')
    obj['source'] = items.get('source')
    obj['title'] = items.get('title', '')
    obj['update_time'] = update_time()
    obj['org_url'] = items.get('org_url')

    # 可能不在, 自己提取
    obj['type'] = items.get('type') if items.get('type') else type_extract(
        title)
    obj['trial_round'] = items.get('trial_round') if items.get(
        'trial_round') else trial_round_extract(title)
    obj['content_type'] = content_type_extract(content_type=items.get(
        'content_type')) if items.get('content_type') else None
    content_type = items.get('content_type')
    reason = items.get('reason')
    trial_date = items.get('trial_date')
    if articles:
        ws = WenshuBase(article)
        litigants, agents = litigants_agent_extract('\n'.join(
            ws.role_paragraph))
        court_officers = court_extract('\n'.join(ws.court_paragraph))
        # print(ws.claims_paragraphs)
        litigation_request, claim = claim_extract(ws.claims_paragraphs)
        trial_date = trial_date if trial_date else trial_date_extract(
            ws.court_paragraph)
        court_level = court_level_extract(obj.get('court_name'))
        obj['litigants'] = litigants
        obj['agents'] = agents
        obj['court_officers'] = court_officers
        obj['court_level'] = court_level
        obj['content'] = '<br>'.join(articles)
        obj['content_type'] = content_type if content_type else content_type_extract(
            verdict=ws.verdict_paragraph, title=obj.get('title'))
        obj['reasons'] = reason_extract(ws.reason_description,
                                        obj.get('title'), obj.get('type'),
                                        reason)
        obj['verdict'] = ws.verdict
        obj['trial_date'] = trial_date
        obj['litigation_request'] = litigation_request
        obj['claim'] = claim
    obj['instrument_id'] = get_md5(obj.get('title')) + get_md5(
        pbracket(obj.get('case_no')))
    return obj
Example #2
0
def process_ws(items):
    l = heilongjiang_list(items)
    a = heilongjiang_article(items)

    ws = WenshuBase('\n'.join(a[5:]))
    litigants, agents = litigants_agent_extract('\n'.join(ws.role_paragraph))
    court_officers = court_extract('\n'.join(ws.court_paragraph))
    type = type_extract(l.get('title'))
    reasons = reason_extract(reason_description=ws.reason_description,
                             title=l.get('title'),
                             trial_type=type)
    trial_date = trial_date_extract(''.join(ws.court_paragraph))
    court_level = court_level_extract(l.get('court_name'))
    trial_round = trial_round_extract(l.get('title'))
    content_type = content_type_extract(ws.verdict_paragraph, l.get('title'))
    case_no = a[4]
    claim = ''
    if content_type == '判决书' and trial_round == '一审':
        for reason in reasons:
            if reason['reason_code_level2'] == 104 or reason[
                    'reason_code_level2'] == 105:
                claim = claim_extract(ws.claims_paragraphs)
    obj = {
        'case_no': pbracket(case_no),
        'reasons': reasons,
        'source': '黑龙江市高级人民法院',
        'type': type,
        'title': pbracket(l.get('title')),
        'content': '<br>'.join(a[5:]),
        'agents': agents,
        'update_time': update_time(),
        'litigants': litigants,
        'content_type': content_type,
        'trial_round': trial_round,
        'court_level': court_level,
        'verdict': ws.verdict,
        'trial_date': trial_date,
        'court_officers': court_officers,
        'court_name': l.get('court_name'),
        'claim': claim,
        'operator': 'leifeng',
        'instrument_id': get_md5(l.get('title')) + get_md5(pbracket(case_no))
    }

    ines(id=obj['instrument_id'],
         path='http://10.1.1.28:9200/judge_doc/local_doc',
         data=obj)
    if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc',
                 field='case_no',
                 value=obj['case_no']):
        ines(id=obj['instrument_id'],
             path='http://10.1.1.28:9200/judge_doc/total_doc',
             data=obj)
Example #3
0
                "select * from judge_doc_qinghai  limit 1".format()):

            print(items['id'])
            try:
                l = qinghai_list(items)
                a = qinghai_article(items)
                ws = WenshuBase('\n'.join(a[3:]))
                litigants, agents = litigants_agent_extract('\n'.join(
                    ws.role_paragraph))
                court_officers = court_extract('\n'.join(ws.court_paragraph))
                type = type_extract(l.get('title'))
                reasons = reason_extract(
                    reason_description=ws.reason_description,
                    title=l.get('title'),
                    trial_type=type)
                trial_date = trial_date_extract(''.join(ws.court_paragraph))
                court_level = court_level_extract(l.get('court_name'))
                trial_round = trial_round_extract(l.get('title'))
                content_type = content_type_extract(ws.verdict_paragraph,
                                                    l.get('title'))
                case_no = l.get('case_no')
                claim = ''
                if content_type == '判决书' and trial_round == '一审':
                    for reason in reasons:
                        if reason['reason_code_level2'] == 104 or reason[
                                'reason_code_level2'] == 105:
                            claim = claim_extract(ws.claims_paragraphs)
                obj = {
                    'case_no':
                    pbracket(l.get('case_no')),
                    'reasons':