예제 #1
0
def process_ws(items):
    try:
        obj = mining(items)
        ines(id=obj['instrument_id'],
             path='http://10.1.1.28:9200/judge_doc/local_doc',
             data=obj)
        if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc',
                     field='case_no',
                     value=obj['case_no']):
            ines(id=obj['instrument_id'],
                 path='http://10.1.1.28:9200/judge_doc/total_doc',
                 data=obj)
    except Exception as e:
        id = get_md5(items.get('title')) + get_md5(
            pbracket(items.get('case_no', '')))
        obj = {
            "_reason_": str(e),
            "data_size": len(items),
            "crawl_time": update_time(),
            "processed": False,
            "hostname": "worker1.yscredit.com",
            "data": items,
            "create_time": update_time(),
            "ip": "null",
            "_id_": id,
            "topic": "裁判文书"
        }
        print(obj)
        ines(id=id,
             path='http://10.1.1.28:9200/fail_record/fail_record',
             data=obj)
예제 #2
0
def process_ws(items):
    l = heilongjiang_list(items)
    a = heilongjiang_article(items)

    ws = WenshuBase('\n'.join(a[5:]))
    litigants, agents = litigants_agent_extract('\n'.join(ws.role_paragraph))
    court_officers = court_extract('\n'.join(ws.court_paragraph))
    type = type_extract(l.get('title'))
    reasons = reason_extract(reason_description=ws.reason_description,
                             title=l.get('title'),
                             trial_type=type)
    trial_date = trial_date_extract(''.join(ws.court_paragraph))
    court_level = court_level_extract(l.get('court_name'))
    trial_round = trial_round_extract(l.get('title'))
    content_type = content_type_extract(ws.verdict_paragraph, l.get('title'))
    case_no = a[4]
    claim = ''
    if content_type == '判决书' and trial_round == '一审':
        for reason in reasons:
            if reason['reason_code_level2'] == 104 or reason[
                    'reason_code_level2'] == 105:
                claim = claim_extract(ws.claims_paragraphs)
    obj = {
        'case_no': pbracket(case_no),
        'reasons': reasons,
        'source': '黑龙江市高级人民法院',
        'type': type,
        'title': pbracket(l.get('title')),
        'content': '<br>'.join(a[5:]),
        'agents': agents,
        'update_time': update_time(),
        'litigants': litigants,
        'content_type': content_type,
        'trial_round': trial_round,
        'court_level': court_level,
        'verdict': ws.verdict,
        'trial_date': trial_date,
        'court_officers': court_officers,
        'court_name': l.get('court_name'),
        'claim': claim,
        'operator': 'leifeng',
        'instrument_id': get_md5(l.get('title')) + get_md5(pbracket(case_no))
    }

    ines(id=obj['instrument_id'],
         path='http://10.1.1.28:9200/judge_doc/local_doc',
         data=obj)
    if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc',
                 field='case_no',
                 value=obj['case_no']):
        ines(id=obj['instrument_id'],
             path='http://10.1.1.28:9200/judge_doc/total_doc',
             data=obj)
예제 #3
0
파일: run_zy.py 프로젝트: chenliy/youshu
def process_ws(items):
    try:
        obj = mining(items)
        obj = tag(obj)
        ines(id=obj['instrument_id'],
             path='{}/judge_doc/local_doc'.format(es_path),
             data=obj)
        if obj['source'] != '裁判文书网':
            if is_exists(url='{}/judge_doc/total_doc'.format(es_path),
                         field='case_no',
                         value=obj['case_no']):
                ines(id=obj['instrument_id'],
                     path='{}/judge_doc/total_doc'.format(es_path),
                     data=obj)
        else:
            ines(id=obj['instrument_id'],
                 path='{}/judge_doc/total_doc'.format(es_path),
                 data=obj)

    except Exception as e:
        id = get_md5(items.get('title') + str(update_time()))
        obj = {
            "_reason_": str(e),
            "data_size": len(items),
            "crawl_time": update_time(),
            "processed": False,
            "hostname": "worker1.yscredit.com",
            "data": items,
            "create_time": update_time(),
            "ip": "null",
            "_id_": id,
            "topic": "裁判文书"
        }
        print(obj)
        ines(id=id,
             path='{}/fail_record/fail_record'.format(es_path),
             data=obj)
예제 #4
0
                    trial_round,
                    'court_level':
                    court_level,
                    'verdict':
                    ws.verdict,
                    'trial_date':
                    trial_date,
                    'court_officers':
                    court_officers,
                    'court_name':
                    l.get('court_name'),
                    'claim':
                    claim,
                    'operator':
                    'leifeng',
                    'instrument_id':
                    get_md5(l.get('title')) +
                    get_md5(pbracket(l.get('case_no')))
                }

                ines(id=obj['instrument_id'],
                     path='http://10.1.1.28:9200/judge_doc/local_doc',
                     data=obj)
                if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc',
                             field='case_no',
                             value=obj['case_no']):
                    ines(id=obj['instrument_id'],
                         path='http://10.1.1.28:9200/judge_doc/total_doc',
                         data=obj)
            except e:
                print(e)
예제 #5
0
파일: run_t.py 프로젝트: chenliy/youshu
def process_ws(items):
    obj = {}
    shls = shanghai_list(items)
    trial_type = shanghai_trial_type(items)
    court_name = shanghai_court_name(items)
    # content = shanghai_content(items)
    article = shanghai_aricle(items)

    ws = WenshuBase(article)
    litigants, agents = litigants_agent_extract('\n'.join(ws.role_paragraph))
    court_officers = court_extract('\n'.join(ws.court_paragraph))
    content_type = content_type_extract(ws.verdict_paragraph, shls['title'])
    reasons = reason_extract(reason_description=ws.reason_description,
                             title=shls['title'],
                             trial_type=trial_type)
    court_level = court_level_extract(court_name)
    claim = ''
    if content_type == '判决书' and shls['trial_round'] == '一审':
        for reason in reasons:
            if reason['reason_code_level2'] == 104 or reason[
                    'reason_code_level2'] == 105:
                claim = claim_extract(ws.claims_paragraphs)
    obj = {
        'case_no':
        pbracket(shls['case_no']),
        'reasons':
        reasons,
        'source':
        '上海市高级人民法院',
        'type':
        trial_type,
        'title':
        pbracket(shls['title']),
        'content':
        re.sub('\n', '<br>', article),
        'agents':
        agents,
        'update_time':
        update_time(),
        'litigants':
        litigants,
        'content_type':
        content_type,
        'trial_round':
        shls['trial_round'],
        'court_level':
        court_level,
        'verdict':
        ws.verdict,
        'trial_date':
        shls['trial_date'],
        'court_officers':
        court_officers,
        'court_name':
        court_name,
        'claim':
        claim,
        'operator':
        'leifeng',
        'instrument_id':
        get_md5(shls['title']) + get_md5(pbracket(shls['case_no']))
    }
    ines(id=obj['instrument_id'],
         path='http://10.1.1.28:9200/judge_doc/local_doc',
         data=obj)
    if is_exists(url='http://10.1.1.28:9200/judge_doc/total_doc',
                 field='case_no',
                 value=obj['case_no']):
        ines(id=obj['instrument_id'],
             path='http://10.1.1.28:9200/judge_doc/total_doc',
             data=obj)