def amount_unsure(self): """ 根据案件概述,初步获得起诉的总金额 """ amount_unsure = None if self.trial_level == 1: text = functions.TextProcessor(self.first_basic_text).clean_text moneys = functions.TextProcessor(text).extract_moneys() if moneys: amount_unsure = max(moneys) return amount_unsure # float, 以万元为单位
def num_of_facts(self): """ 犯罪事实的数量。根据日期的数量综合判断 """ num_of_facts = None if self.trial_level == 1: text = functions.TextProcessor(self.first_fact_text).clean_text fact_date_ints = [] all_match = settings.pattern_num_of_facts.finditer(text) for match in all_match: year_str = match.group(1) month_str = match.group(2).translate( str.maketrans({ '春': '3', '夏': '6', '秋': '9', '冬': '12' })) fact_date_int = int(year_str) * 100 + int( month_str) if month_str else int( year_str) * 100 # 将日期格式化为六位数的int fact_date_ints.append(fact_date_int) fact_date_ints = sorted(list(set(fact_date_ints))) # 去重、排序 if len(fact_date_ints) == 1: num_of_facts = 1 elif len(fact_date_ints) > 1: num_of_facts = len(fact_date_ints) - 1 return num_of_facts
def is_simple_procedure(self): """ 是否简易程序 0-否 1-是 默认0 """ is_simple_procedure = 0 text = functions.TextProcessor(self.first_basic_text).clean_text if '简易程序' in text and '转为普通程序' not in text: is_simple_procedure = 1 return is_simple_procedure
def job_info(self): """ 犯罪行为人或犯罪对象的职务信息,包括职务名、单位性质、职务级别。该字段在paper.defendant_info['job']的基础上针对贪污贿赂罪拓展 """ job_info = {'job': None, 'job_type': None, 'job_grade': None} if self.trial_level == 1: # 寻找职务名 if self.defendant_info[ 'job'] is not None: # 直接引用paper.defendant_info['job'] job_info['job'] = self.defendant_info['job'] else: text = functions.TextProcessor(self.first_fact_text).clean_text text = text[:text.find('证据')] job_match = settings.pattern_job_info['job'].search(text) if job_match: job_info['job'] = job_match.group(1) if job_info['job'] is not None: # 判断单位性质 for job_type in settings.JOB_TYPES: if job_info['job_type'] is None: for job_type_key in settings.JOB_TYPE_DICT[job_type]: if job_type in ( 'X', 'S'): # 对行政机关、事业单位和人民团体特殊处理,只检查最后6个字 _job = job_info['job'][-6:] if len( job_info['job']) > 6 else job_info['job'] if job_type_key in _job: job_info['job_type'] = job_type break else: # 其他类型的单位普通处理,对全部职务名称检索关键词 if job_type_key in job_info['job']: job_info['job_type'] = job_type break return job_info
def is_plus_investigated(self): """ 是否有补充侦查 0-否 1-是 默认0 """ is_plus_investigated = 0 if self.trial_level == 1: text = functions.TextProcessor(self.first_basic_text).clean_text if '补充侦查' in text: is_plus_investigated = 1 return is_plus_investigated # int
def amount_sure(self): """ 根据法院认定情况或已查明的事实,初步获得认定的总金额 """ amount_sure = None if self.trial_level == 1: # 首先在法院认定情况中寻找 text = functions.TextProcessor(self.first_opinion_text).clean_text text = text[:text.find('辩护')] # 截至'辩护' moneys = functions.TextProcessor(text).extract_moneys() if moneys: amount_sure = max(moneys) # 如果找不到,再在已查明的事实中寻找 else: text = functions.TextProcessor(self.first_fact_text).clean_text text = text[:text.find('证据')] # 定位事实部分 moneys = functions.TextProcessor(text).extract_moneys() if moneys: amount_sure = max(moneys) return amount_sure # float, 以万元为单位
def is_delayed(self): """ 是否延期 0-否 1-是 默认0 """ is_delayed = 0 if self.trial_level == 1: text = functions.TextProcessor(self.first_basic_text).clean_text match = settings.pattern_is_delayed.search(text) if match: is_delayed = 1 return is_delayed
def prosecute_number(self): """ 获取起诉书号 """ prosecute_number = None if self.trial_level == 1: text = functions.TextProcessor(self.first_basic_text).clean_text match = settings.pattern_prosecute_number.search(text) if match: prosecute_number = match.group(1) return prosecute_number # str
def is_bad_effect(self): """ 是否造成恶劣社会影响/国家和人民利益损失 0否1是 默认0 """ is_bad_effect = 0 if self.trial_level == 1: text = functions.TextProcessor(self.first_opinion_text).clean_text text = text[:text.find('辩护')] match = settings.pattern_bad_effect.search(text) if match: is_bad_effect = 1 return is_bad_effect
def is_zishou(self): """ 是否有自首情节 0-否 1-是 默认0 """ is_zishou = 0 if self.trial_level == 1: text = functions.TextProcessor(self.first_opinion_text).clean_text # 消除辩护意见 for sentence in self.defensive_opinion_sentences: text = text.replace(sentence, '') if '自首' in text: is_zishou = 1 return is_zishou
def defensive_opinion_sentences(self): """ 在法院认定意见中,获取含辩护意见的多个句子元组或空元组 """ defensive_opinion_sentences = [] if self.trial_level == 1: opinion_sentences = functions.TextProcessor( self.first_opinion_text).sentences if opinion_sentences: if '本院认为' in opinion_sentences[0]: for s in opinion_sentences: if '辩护' in s: defensive_opinion_sentences.append(s) return tuple(defensive_opinion_sentences) # tuple(str, )
def money_usage(self): """ 赃款的用途 """ money_usage = None if self.trial_level == 1: if self.cause in ('贪污罪', '受贿罪', '挪用公款罪'): text = functions.TextProcessor( self.first_opinion_text).clean_text match = settings.pattern_money_usage.search(text) if match: money_usage = match.group(1) return money_usage
def is_suohui(self): """ 是否有索贿情节 0否1是 默认0 """ is_suohui = None if self.trial_level == 1: if self.cause == '受贿罪': is_suohui = 0 text = functions.TextProcessor( self.first_opinion_text).clean_text text = text[:text.find('辩护')] if '索贿' in text: is_suohui = 1 return is_suohui
def is_tuizang(self): """ 是否退赃 0否1是 默认0 """ is_tuizang = None if self.trial_level == 1: if self.cause in ('贪污罪', '受贿罪'): is_tuizang = 0 text = functions.TextProcessor( self.first_opinion_text).clean_text text = text[:text.find('辩护')] if '退' in text: # 退回 退赃 退缴 退清 退出 退交 退还 退赔 退完 is_tuizang = 1 return is_tuizang
def is_seek_promote(self): """ 是否谋求他人职务调整 0否1是 默认0 """ is_seek_promote = None if self.trial_level == 1: if self.cause == '受贿罪': is_seek_promote = 0 text = functions.TextProcessor( self.first_opinion_text).clean_text text = text[:text.find('辩护')] if '提拔' in text: is_seek_promote = 1 return is_seek_promote
def prosecutors(self): """ 获取公诉人姓名列表 """ prosecutors = [] if self.trial_level == 1: text = functions.TextProcessor(self.first_basic_text).clean_text match = settings.pattern_prosecutors.search(text) if match: prosecutors = list( map( lambda a: settings.pattern_prosecutors_delete_strings. sub('', a), match.group(1).split('、'))) return prosecutors # list[str, ]
def is_punished_by_party_admin(self): """ 是否曾因贪污、受贿受过党纪、行政处分 0否1是 默认0 """ is_punished_by_party_admin = None if self.trial_level == 1: if self.cause in ('贪污罪', '受贿罪'): is_punished_by_party_admin = 0 text = functions.TextProcessor( self.first_opinion_text).clean_text text = text[:text.find('辩护')] match = settings.pattern_punished_by_party_admin.search(text) if match: is_punished_by_party_admin = 1 return is_punished_by_party_admin
def is_special_money(self): """ 是否贪污特定款项 0否1是 默认0 """ is_special_money = None if self.trial_level == 1: if self.cause in ('贪污罪', '挪用公款罪'): is_special_money = 0 text = functions.TextProcessor( self.first_opinion_text).clean_text text = text[:text.find('辩护')] match = settings.pattern_special_money.search(text) if match: is_special_money = 1 return is_special_money
def is_tanbai(self): """ 是否有坦白情节 0-否 1-是 默认0 """ """ 包含表述:坦白;认罪;如实供述;交代 """ is_tanbai = 0 if self.is_zishou: # 是自首的一定是坦白 is_tanbai = 1 elif self.trial_level == 1: text = functions.TextProcessor(self.first_opinion_text).clean_text # 消除辩护意见 for sentence in self.defensive_opinion_sentences: text = text.replace(sentence, '') tanbai_match = settings.pattern_tanbai.search(text) if tanbai_match: is_tanbai = 1 return is_tanbai
def is_punished_by_criminal_law(self): """ 是否曾因故意犯罪受过刑事追究 0否1是 默认0 """ is_punished_by_criminal_law = None if self.trial_level == 1: if self.cause in ('贪污罪', '受贿罪'): if self.is_leifan == 1: # 如果是累犯,该字段值自动为1 is_punished_by_criminal_law = 1 else: is_punished_by_criminal_law = 0 text = functions.TextProcessor( self.first_opinion_text).clean_text text = text[:text.find('辩护')] match = settings.pattern_punished_by_criminal_law.search( text) if match: is_punished_by_criminal_law = 1 return is_punished_by_criminal_law
def to_html(self, html_path): """ 输出文书内容到html文件。必须指定文件的绝对路径html_path """ """ 按段落输出,同时输出各段落标记 """ with open(html_path, 'w', encoding='utf-8') as f: f.write( settings.html_template_head.replace('{title}', str(self.paper_id))) jid, cause, title, case_number, court = functions.ItemDumper( # 格式化输出 self.jid, self.cause, self.title, self.case_number, self.court).format() f.write("""<p>{0} {1}</p>\n<p>{2}</p>\n<p>{3}</p>\n<p>{4}</p>\n""". format(jid, cause, title, case_number, court)) for para in self.all_paragraphs: f.write("<p>{0}.{1}</p>\n<p>{2}</p>\n".format( para[0], para[1], functions.TextProcessor(para[3]).clean_text)) f.write(settings.html_template_tail) print('to html finished at paper_id: {}'.format(self.paper_id)) return 0
def gongfan(self): """ 共犯状态 0-不区分主从 1-主犯 2-从犯 默认None """ """ 目前只适用于单人的判决书 """ gongfan = None if self.trial_level == 1: text = functions.TextProcessor(self.first_opinion_text).clean_text # 消除辩护意见 for sentence in self.defensive_opinion_sentences: text = text.replace(sentence, '') no_zhucong_match = settings.pattern_gongfan['no_zhucong'].search( text) if no_zhucong_match: gongfan = 0 else: zhucong_match = settings.pattern_gongfan['zhucong'].search( text) if zhucong_match: if zhucong_match.group(1) == '主': gongfan = 1 elif zhucong_match.group(1) == '从': gongfan = 2 return gongfan
def penalty(self): """ 判决结果 """ penalty = None if self.trial_level == 1: text = functions.TextProcessor( self.first_judge_text).clean_text.split(' ')[0] penalty = { 'many': None, 'freedom': None, 'property': None, 'right': None, 'delay': None } # many 确定罪数 many_strings = settings.pattern_penalty['many'].findall(text) if many_strings: # 如果提取不到罪数,则认为该句存在问题,放弃继续提取;如有罪数,则改其他项的None为0 penalty = { 'many': len(many_strings), 'freedom': 0, 'property': 0.0, 'right': 0, 'delay': 0 } text = settings.pattern_penalty['split'].split(text)[ -1] # 定位最终执行语句 # freedom 主刑 for k, v in settings.pattern_penalty['freedom'].items(): freedom_match = v.search(text) if freedom_match: if k == 'juyi': # 拘役用负数表示 penalty[ 'freedom'] = -functions.TextProcessor.period2num( freedom_match.group(1)) elif k == 'youqitx': # 有期徒刑用正数表示 penalty[ 'freedom'] = functions.TextProcessor.period2num( freedom_match.group(1)) elif k in ('wuqitx', 'sixing'): # 无期徒刑、死刑直接写入 penalty['freedom'] = freedom_match.group(0) break # property 财产刑 if '全部' in text: # 先搜索没收个人全部财产,如有,直接写入字符串 penalty['property'] = '全部' else: fajin_match = settings.pattern_penalty['property'][ 'fajin'].search(text) moshou_match = settings.pattern_penalty['property'][ 'moshou'].search(text) fajin_money = functions.TextProcessor(fajin_match.group( 1)).extract_moneys() if fajin_match else None moshou_money = functions.TextProcessor( moshou_match.group( 1)).extract_moneys() if moshou_match else None if fajin_money and moshou_money: # 同时有罚金和没收,合并数额,在前面冠以±号 penalty['property'] = '±{0:.2f}'.format( fajin_money[0] + moshou_money[0]) elif fajin_money: # 只有罚金,用正值表示 penalty['property'] = fajin_money[0] elif moshou_money: # 只有没收,用负值表示 penalty['property'] = -moshou_money[0] # right 资格刑 if '政治权利终身' in text: # 先搜索剥夺政治权利终身,如有,直接写入字符串 penalty['right'] = '终身' else: # 搜索剥夺政治权利的具体时长 right_match = settings.pattern_penalty['right'].search( text) if right_match: penalty['right'] = functions.TextProcessor.period2num( right_match.group(1)) # delay 缓刑 delay_match = settings.pattern_penalty['delay'].search(text) if delay_match: penalty['delay'] = functions.TextProcessor.period2num( delay_match.group(1)) # free 检查是否免予处罚、无罪 free_match = settings.pattern_penalty['free'].search(text) if free_match: # 重置为0 penalty = { 'many': penalty['many'], 'freedom': 0, 'property': 0.0, 'right': 0, 'delay': 0 } return penalty
def is_designated(self): """ 是否指定管辖 0-否 1-是 默认0""" is_designated = 0 if '管辖' in functions.TextProcessor(self.first_basic_text).clean_text: is_designated = 1 return is_designated
def defendant_info(self): """ 获取被告人信息字典 """ text = functions.TextProcessor(self.litigant_info_text).clean_text defendant_info = { 'name': None, 'is_name_covered': None, 'sex': None, 'birth': None, 'age': None, 'tribe': '汉族', 'is_minor': 0, 'educated': None, 'job': None } if text: if self.trial_level == 1: # 获取更准确的含被告人信息的句子 text_split = text.split(' ') if len(text_split) < 2: return defendant_info # 无法正确获得含被告人信息的句子 defendant_text = text_split[1][:text_split[1].find('。')] + '。' # name, is_name_covered if self.litigants: # 先引用litigants中的名字 defendant_info['name'] = '+'.join(self.litigants) else: # 如果没有,再自己查找 name_match = settings.pattern_defendant['name'].search( defendant_text) if name_match: name = name_match.group(1) defendant_info['name'] = name if len( name) < 10 else None if defendant_info['name']: if '某' in defendant_info['name'] or functions.TextProcessor( defendant_info['name']).check_exist( r'[^\u4e00-\u9fff]'): defendant_info['is_name_covered'] = 1 else: defendant_info['is_name_covered'] = 0 else: return defendant_info # 如果找不到姓名,则视为句子有缺陷,不再继续查找其他被告人信息,直接返回默认字典 # sex if ',男' in defendant_text: defendant_info['sex'] = 1 elif ',女' in defendant_text: defendant_info['sex'] = 0 # birth, age birth_match = settings.pattern_defendant['birth'].search( defendant_text) if birth_match: dates = functions.TextProcessor( birth_match.group(0)).extract_dates() if dates: defendant_info['birth'] = dates[0] defendant_info[ 'age'] = self.judge_date.year - defendant_info[ 'birth'].year if self.judge_date else None if not defendant_info['age']: # 有些判决书直接写了年龄 age_match = settings.pattern_defendant['age'].search( defendant_text) if age_match: defendant_info['age'] = int(age_match.group(1)) # tribe, is_minor tribe_match = settings.pattern_defendant['tribe'].search( defendant_text) if tribe_match: defendant_info['tribe'] = tribe_match.group(1) if defendant_info['tribe'] != '汉族': defendant_info['is_minor'] = 1 # educated 1-小学 2-初中 3-高中、中专 4-大专、专科 5-大学、本科 6-研究生 educated_name = None for pattern_educated in settings.pattern_defendant['educated']: educated_match = pattern_educated.search(defendant_text) if educated_match: educated_name = educated_match.group(1) break if educated_name: for e_key in settings.EDUCATED_DICT.keys(): if e_key in educated_name: defendant_info[ 'educated'] = settings.EDUCATED_DICT[e_key] break # job job_match = settings.pattern_defendant['job'].search( defendant_text) # 先在defendant_text中找 if job_match: defendant_info['job'] = job_match.group(1) return defendant_info # dict