Beispiel #1
0
def getPersonName(text=""):
    list = RegUtils.getPersonName(text)
    if len(list) > 0:
        return list[0]
    NlpUtils.get_per_list(text)
    if len(list) > 0:
        return list[0]
    return ""
Beispiel #2
0
 def testGetJobs3(self):
     list = reg.getJobsCurr(self.str_text12)
     print(list)
     self.assertIn("中共十九届中央委员", list)
     self.assertIn("国务委员", list)
     self.assertIn("公安部部长", list)
     self.assertIn("党委书记", list)
     self.assertIn("总警监", list)
     self.assertIn("国务院党组成员", list)
Beispiel #3
0
 def testResume(self):
     dict = {}
     dict['volk'] = reg.getVolk(self.str_text)
     dict['edu'] = reg.getEducations(self.str_text)
     dict['alias'] = reg.getAliasName(self.str_text)
     dict['job'] = reg.getJobsCurr(self.str_text)
     dict['majors'] = reg.getMajors(self.str_text)
     dict['schools'] = reg.getSchools(self.str_text)
     dict['location'] = reg.getLocation(self.str_text)
     print(dict)
Beispiel #4
0
 def testGetLocation(self):
     list = reg.getLocation(self.str_text7)
     print(list)
     self.assertIn('浙江龙游', list)
Beispiel #5
0
 def testBirthday(self):
     list = reg.getBirthday(
         "犯罪嫌疑人王海涛,男,汉族,1978年2月10日出生,身份证号码120109197802105034,户籍地:天津市滨海新区大港太平镇六间房村。"
     )
     print(list)
     self.assertIn("1978年2月10日", list)
Beispiel #6
0
 def testAppoint2(self):
     list = reg.getDisappointment("免去吕国范的河南省水利厅副厅长,河南省人民政府移民工作领导小组办公室主任职务。")
     print(list)
     self.assertIn("河南省水利厅副厅长,河南省人民政府移民工作领导小组办公室主任", list)
Beispiel #7
0
 def testPolitics2(self):
     list = reg.getPolitics("政治面貌 中共党员,中共预备党员 入党时间 1996-11-22 民族 汉族 ")
     print(list)
Beispiel #8
0
 def testMobilePhone(self):
     list = reg.getMobilePhone(self.str_text16)
     print(list)
     self.assertIn("18515061589", list)
Beispiel #9
0
 def testFirstDateInJob(self):
     list = reg.getFirstDateInJob(self.str_text16)
     print(list)
     self.assertIn("2011年08月", list)
Beispiel #10
0
 def testAppointmentJob3(self):
     list = reg.getAppointment('卢镱逢同志任省专家和留学人员服务中心(省留学人员创业服务中心)七级职员')
     print(list)
     self.assertIn('省专家和留学人员服务中心(省留学人员创业服务中心)七级职员', list)
Beispiel #11
0
 def testAppointmentJob2(self):
     list = reg.getAppointment('提名梁中基为中国林业集团有限公司副总经理')
     print(list)
     self.assertIn('中国林业集团有限公司副总经理', list)
Beispiel #12
0
 def testAppointmentJob1(self):
     list = reg.getAppointment('杨思雷同志任省职业介绍中心(省就业培训中心)职介科科长')
     print(list)
     self.assertIn('省职业介绍中心(省就业培训中心)职介科科长', list)
Beispiel #13
0
 def testDisappointment(self):
     list = reg.getDisappointment(self.str_text15)
     print(list)
Beispiel #14
0
 def testGetLocation3(self):
     list = reg.getLocation(
         '张任德(在逃人员编号:T8432009999992015090170),男,汉族,1963年12月15日生,户籍地:广西自治区防城港市港口区企沙镇赤沙村赤东组28号'
     )
     print(list)
     self.assertIn('广西自治区防城港市港口区企沙镇赤沙村赤东组28号', list)
Beispiel #15
0
 def testGetLocation2(self):
     list = reg.getLocation('户籍地:天津市滨海新区大港太平镇六间房村。')
     print(list)
     self.assertIn('天津市滨海新区大港太平镇六间房村', list)
Beispiel #16
0
 def testHouseHold(self):
     list = reg.getRegisterHouseHold(self.str_text16)
     print(list)
     self.assertIn("河南省濮阳县", list)
Beispiel #17
0
 def testNativeHouseHold(self):
     list = reg.getNativeHouseHold(self.str_text16)
     print(list)
     self.assertIn("河南省濮阳县", list)
Beispiel #18
0
 def testPersonName2(self):
     list = reg.getPersonName("姓名:逯长松 性别: 男 照片 ")
     print(list)
     self.assertIn("逯长松", list)
Beispiel #19
0
 def testLinkAddress(self):
     list = reg.getLinkAddress(self.str_text16)
     print(list)
     self.assertIn("朝阳区康营小区", list)
Beispiel #20
0
 def testPersonName3(self):
     list = reg.getPersonName("犯罪嫌疑人王海涛")
     print(list)
     self.assertIn("王海涛", list)
Beispiel #21
0
 def testGetDateStage(self):
     list = reg.getDateStage("2004/9 2008/6 江汉大学 计算机科学与技术 本科 学士学位 是")
     print(list)
     self.assertIn("2004/9 2008/6", list)
Beispiel #22
0
 def testPersonName4(self):
     list = reg.getPersonName('陈春友,男,汉族,1978年3月7日出生')
     print(list)
     self.assertIn('陈春友', list)
Beispiel #23
0
 def testAppoint1(self):
     list = reg.getDisappointment("免去王红(女)的河南省公共资源交易中心主任职务。")
     print(list)
     self.assertIn("河南省公共资源交易中心主任", list)
Beispiel #24
0
 def testPersonName5(self):
     list = reg.getPersonName(
         '金彩霞,绰号“金婉婉”,女,汉族,1998年5月14日出生,广东口音,户籍地:广东省徐闻县迈陈镇金宅村65号,身份证号码:440825199805143468。'
     )
     print(list)
     self.assertIn('金彩霞', list)
Beispiel #25
0
 def testAppoint3(self):
     list = reg.getAppointment("任命党培红(女)为河南省民政厅副厅长(试用期一年);")
     print(list)
     self.assertIn("河南省民政厅副厅长", list)
Beispiel #26
0
 def testPersonName6(self):
     list = reg.getPersonName(
         '张任德(在逃人员编号:T8432009999992015090170),男,汉族,1963年12月15日生,户籍地:广西自治区防城港市港口区企沙镇赤沙村赤东组28号'
     )
     print(list)
     self.assertIn('张任德', list)
Beispiel #27
0
def firstInStrList(_list=[]):
    if len(_list) > 0:
        return _list[0]
    return ""


if __name__ == "__main__":
    resume_dict = {}

    dict_result = npt.analy_html(html_text, UrlUtils.parseUrl(html_url))
    arr_resume = dict_result['resume']

    arr_resume_dict = []
    for resume in arr_resume:
        resume['name'] = getPersonName(resume['text'])
        resume['alias'] = firstInStrList(RegUtils.getAliasName(resume['text']))
        resume['location'] = firstInStrList(
            RegUtils.getLocation(resume['text']))
        resume['nid'] = firstInStrList(RegUtils.getIdNbr(resume['text']))
        resume['event_descr'] = firstInStrList(
            RegUtils.getEventDescrs(resume['text']))
        resume['birthday'] = firstInStrList(
            RegUtils.getBirthday(resume['text']))
        resume['volk'] = firstInStrList(RegUtils.getVolk(resume['text']))
        resume['gender'] = firstInStrList(RegUtils.getGender(resume['text']))

        if resume['name'] != '' or resume['nid'] != '':
            arr_resume_dict.append(resume)

    arr_resume_dict = npt.clean_image_url(arr_resume_dict)
    print(arr_resume_dict)
Beispiel #28
0
 def testPolitics(self):
     list = reg.getPolitics(self.str_text16)
     print(list)
     self.assertIn("党员", list)
Beispiel #29
0
import myconfig.conf as config
import nbtest.utils.ArrayUtils as arrUtils
import nbtest.utils.RegUtils as regUtils
import nbtest.utils.NlpUtils as nlpUtils

if __name__ == "__main__":
    db = pymysql.connect(host=config.databaseip,
                         user=config.databaseuser,
                         password=config.databasepasswd,
                         database=config.databasename)
    cursor = db.cursor()

    dict_result = npt.analy_html(html_text)
    resume_text = arrUtils.listToString(dict_result['resume'])

    dict_result['reason'] = arrUtils.listToString(dict_result['reason'])
    dict_result['result'] = arrUtils.listToString(dict_result['result'])
    dict_resume = regUtils.analyToResumeDict(resume_text)
    dict_result['name'] = arrUtils.listToString(
        nlpUtils.get_per_list(resume_text))
    dict_result.update(dict_resume)

    cursor.execute(
        "insert into aml_cnnnews_crawl (name, volk, education, job, major, school, location, punish_reason , punish_result) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') "
        % (dict_result['name'], dict_result['volk'], dict_result['edu'],
           dict_result['job'], dict_result['majors'], dict_result['schools'],
           dict_result['location'], dict_result['reason'],
           dict_result['result']))
    db.commit()
    db.close()
Beispiel #30
0
 def testMarriage(self):
     list = reg.getMarriage(self.str_text16)
     print(list)
     self.assertIn("未婚", list)