def getPersonName(text=""): list = RegUtils.getPersonName(text) if len(list) > 0: return list[0] NlpUtils.get_per_list(text) if len(list) > 0: return list[0] return ""
def testGetJobs3(self): list = reg.getJobsCurr(self.str_text12) print(list) self.assertIn("中共十九届中央委员", list) self.assertIn("国务委员", list) self.assertIn("公安部部长", list) self.assertIn("党委书记", list) self.assertIn("总警监", list) self.assertIn("国务院党组成员", list)
def testResume(self): dict = {} dict['volk'] = reg.getVolk(self.str_text) dict['edu'] = reg.getEducations(self.str_text) dict['alias'] = reg.getAliasName(self.str_text) dict['job'] = reg.getJobsCurr(self.str_text) dict['majors'] = reg.getMajors(self.str_text) dict['schools'] = reg.getSchools(self.str_text) dict['location'] = reg.getLocation(self.str_text) print(dict)
def testGetLocation(self): list = reg.getLocation(self.str_text7) print(list) self.assertIn('浙江龙游', list)
def testBirthday(self): list = reg.getBirthday( "犯罪嫌疑人王海涛,男,汉族,1978年2月10日出生,身份证号码120109197802105034,户籍地:天津市滨海新区大港太平镇六间房村。" ) print(list) self.assertIn("1978年2月10日", list)
def testAppoint2(self): list = reg.getDisappointment("免去吕国范的河南省水利厅副厅长,河南省人民政府移民工作领导小组办公室主任职务。") print(list) self.assertIn("河南省水利厅副厅长,河南省人民政府移民工作领导小组办公室主任", list)
def testPolitics2(self): list = reg.getPolitics("政治面貌 中共党员,中共预备党员 入党时间 1996-11-22 民族 汉族 ") print(list)
def testMobilePhone(self): list = reg.getMobilePhone(self.str_text16) print(list) self.assertIn("18515061589", list)
def testFirstDateInJob(self): list = reg.getFirstDateInJob(self.str_text16) print(list) self.assertIn("2011年08月", list)
def testAppointmentJob3(self): list = reg.getAppointment('卢镱逢同志任省专家和留学人员服务中心(省留学人员创业服务中心)七级职员') print(list) self.assertIn('省专家和留学人员服务中心(省留学人员创业服务中心)七级职员', list)
def testAppointmentJob2(self): list = reg.getAppointment('提名梁中基为中国林业集团有限公司副总经理') print(list) self.assertIn('中国林业集团有限公司副总经理', list)
def testAppointmentJob1(self): list = reg.getAppointment('杨思雷同志任省职业介绍中心(省就业培训中心)职介科科长') print(list) self.assertIn('省职业介绍中心(省就业培训中心)职介科科长', list)
def testDisappointment(self): list = reg.getDisappointment(self.str_text15) print(list)
def testGetLocation3(self): list = reg.getLocation( '张任德(在逃人员编号:T8432009999992015090170),男,汉族,1963年12月15日生,户籍地:广西自治区防城港市港口区企沙镇赤沙村赤东组28号' ) print(list) self.assertIn('广西自治区防城港市港口区企沙镇赤沙村赤东组28号', list)
def testGetLocation2(self): list = reg.getLocation('户籍地:天津市滨海新区大港太平镇六间房村。') print(list) self.assertIn('天津市滨海新区大港太平镇六间房村', list)
def testHouseHold(self): list = reg.getRegisterHouseHold(self.str_text16) print(list) self.assertIn("河南省濮阳县", list)
def testNativeHouseHold(self): list = reg.getNativeHouseHold(self.str_text16) print(list) self.assertIn("河南省濮阳县", list)
def testPersonName2(self): list = reg.getPersonName("姓名:逯长松 性别: 男 照片 ") print(list) self.assertIn("逯长松", list)
def testLinkAddress(self): list = reg.getLinkAddress(self.str_text16) print(list) self.assertIn("朝阳区康营小区", list)
def testPersonName3(self): list = reg.getPersonName("犯罪嫌疑人王海涛") print(list) self.assertIn("王海涛", list)
def testGetDateStage(self): list = reg.getDateStage("2004/9 2008/6 江汉大学 计算机科学与技术 本科 学士学位 是") print(list) self.assertIn("2004/9 2008/6", list)
def testPersonName4(self): list = reg.getPersonName('陈春友,男,汉族,1978年3月7日出生') print(list) self.assertIn('陈春友', list)
def testAppoint1(self): list = reg.getDisappointment("免去王红(女)的河南省公共资源交易中心主任职务。") print(list) self.assertIn("河南省公共资源交易中心主任", list)
def testPersonName5(self): list = reg.getPersonName( '金彩霞,绰号“金婉婉”,女,汉族,1998年5月14日出生,广东口音,户籍地:广东省徐闻县迈陈镇金宅村65号,身份证号码:440825199805143468。' ) print(list) self.assertIn('金彩霞', list)
def testAppoint3(self): list = reg.getAppointment("任命党培红(女)为河南省民政厅副厅长(试用期一年);") print(list) self.assertIn("河南省民政厅副厅长", list)
def testPersonName6(self): list = reg.getPersonName( '张任德(在逃人员编号:T8432009999992015090170),男,汉族,1963年12月15日生,户籍地:广西自治区防城港市港口区企沙镇赤沙村赤东组28号' ) print(list) self.assertIn('张任德', list)
def firstInStrList(_list=[]): if len(_list) > 0: return _list[0] return "" if __name__ == "__main__": resume_dict = {} dict_result = npt.analy_html(html_text, UrlUtils.parseUrl(html_url)) arr_resume = dict_result['resume'] arr_resume_dict = [] for resume in arr_resume: resume['name'] = getPersonName(resume['text']) resume['alias'] = firstInStrList(RegUtils.getAliasName(resume['text'])) resume['location'] = firstInStrList( RegUtils.getLocation(resume['text'])) resume['nid'] = firstInStrList(RegUtils.getIdNbr(resume['text'])) resume['event_descr'] = firstInStrList( RegUtils.getEventDescrs(resume['text'])) resume['birthday'] = firstInStrList( RegUtils.getBirthday(resume['text'])) resume['volk'] = firstInStrList(RegUtils.getVolk(resume['text'])) resume['gender'] = firstInStrList(RegUtils.getGender(resume['text'])) if resume['name'] != '' or resume['nid'] != '': arr_resume_dict.append(resume) arr_resume_dict = npt.clean_image_url(arr_resume_dict) print(arr_resume_dict)
def testPolitics(self): list = reg.getPolitics(self.str_text16) print(list) self.assertIn("党员", list)
import myconfig.conf as config import nbtest.utils.ArrayUtils as arrUtils import nbtest.utils.RegUtils as regUtils import nbtest.utils.NlpUtils as nlpUtils if __name__ == "__main__": db = pymysql.connect(host=config.databaseip, user=config.databaseuser, password=config.databasepasswd, database=config.databasename) cursor = db.cursor() dict_result = npt.analy_html(html_text) resume_text = arrUtils.listToString(dict_result['resume']) dict_result['reason'] = arrUtils.listToString(dict_result['reason']) dict_result['result'] = arrUtils.listToString(dict_result['result']) dict_resume = regUtils.analyToResumeDict(resume_text) dict_result['name'] = arrUtils.listToString( nlpUtils.get_per_list(resume_text)) dict_result.update(dict_resume) cursor.execute( "insert into aml_cnnnews_crawl (name, volk, education, job, major, school, location, punish_reason , punish_result) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') " % (dict_result['name'], dict_result['volk'], dict_result['edu'], dict_result['job'], dict_result['majors'], dict_result['schools'], dict_result['location'], dict_result['reason'], dict_result['result'])) db.commit() db.close()
def testMarriage(self): list = reg.getMarriage(self.str_text16) print(list) self.assertIn("未婚", list)