def getPersonName(text=""): list = RegUtils.getPersonName(text) if len(list) > 0: return list[0] NlpUtils.get_per_list(text) if len(list) > 0: return list[0] return ""
def getLinkAddress(text=""): reg = r"(通讯地址)(:|:)?( )*([\u4e00-\u9fa5]+)" list = extrRegEntity(text, reg, index=[3], keywords=["通讯地址"]) _list = nlpUtils.get_loc_list(arrUtils.listToString(list)) if len(_list) > 0: return list return []
def getNativeHouseHold(text=""): reg = r"(籍贯)(:|:)?( )*([\u4e00-\u9fa5]+)" list = extrRegEntity(text, reg, index=[3], keywords=["籍贯"]) _list = nlpUtils.get_loc_list(arrUtils.listToString(list)) if len(_list) > 0: return list return []
def test_person_name(self): res = nlp.get_per_list( '各处室、直属单位:经研究决定:杨思雷同志任省职业介绍中心(省就业培训中心)职介科科长;陶尚同志任省职业介绍中心(省就业培训中心)代理科科长。' ) print(res)
def test_loc_name(self): res = nlp.get_loc_list('上海市松江区九亭镇虬泾路118号') print(res)
import myconfig.conf as config import nbtest.utils.ArrayUtils as arrUtils import nbtest.utils.RegUtils as regUtils import nbtest.utils.NlpUtils as nlpUtils if __name__ == "__main__": db = pymysql.connect(host=config.databaseip, user=config.databaseuser, password=config.databasepasswd, database=config.databasename) cursor = db.cursor() dict_result = npt.analy_html(html_text) resume_text = arrUtils.listToString(dict_result['resume']) dict_result['reason'] = arrUtils.listToString(dict_result['reason']) dict_result['result'] = arrUtils.listToString(dict_result['result']) dict_resume = regUtils.analyToResumeDict(resume_text) dict_result['name'] = arrUtils.listToString( nlpUtils.get_per_list(resume_text)) dict_result.update(dict_resume) cursor.execute( "insert into aml_cnnnews_crawl (name, volk, education, job, major, school, location, punish_reason , punish_result) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') " % (dict_result['name'], dict_result['volk'], dict_result['edu'], dict_result['job'], dict_result['majors'], dict_result['schools'], dict_result['location'], dict_result['reason'], dict_result['result'])) db.commit() db.close()
import myconfig.conf as config import re import pymysql if __name__ == "__main__": # db = pymysql.connect(host=config.databaseip, user=config.databaseuser, password=config.databasepasswd, database=config.databasename) # cursor = db.cursor() arr_person_resume = [] dict_result = npt.analy_html(html_text) for appoint_texts in dict_result['appoint']: for appoint_text in re.split(r";|;|。", appoint_texts): dict_resume = {} dict_resume['appoint'] = arrUtils.listToString(regUtils.getAppointment(appoint_text)) dict_resume['disappoint'] = arrUtils.listToString(regUtils.getDisappointment(appoint_text)) dict_resume['name'] = arrUtils.listToString(arrUtils.flatListToStr(nlpUtils.get_per_list(appoint_text)), ',') if dict_resume['appoint'] == '' and dict_resume['disappoint'] == '': continue arr_person_resume.append(dict_resume) print(dict_resume) # cursor.execute("insert into aml_cnnnews_crawl (name, appointment , disappointment) VALUES ('%s', '%s', '%s') " % # (dict_resume['name'], dict_resume['appoint'], dict_resume['disappoint'])) print(dict_result) # db.commit() # db.close()