def __init__(self): """ 声明3个cv解析器 """ self.cv_parser_58 = CvParser58() self.cv_parser_51job = CvParser51Job() self.cv_parser_zhilian = CvParserZhiLian()
class CvParser: """ lagou,智联和51job的html解析接口 """ def __init__(self): """ 声明3个cv解析器 """ self.cv_parser_58 = CvParser58() self.cv_parser_51job = CvParser51Job() self.cv_parser_zhilian = CvParserZhiLian() def parser(self, htmlContent=None, fname=None, url=None, cvFrom="lagou"): """ 根据jd_from 参数选择合适的解析器进行解析 """ result = {} if re.search(u"51job", cvFrom): result = self.cv_parser_51job.parser(htmlContent, fname, url) elif re.search(u"58", cvFrom): result = self.cv_parser_58.parser(htmlContent, fname, url) elif re.search(u"zhilian", cvFrom): result = self.cv_parser_zhilian.parser(htmlContent, fname, url) return result
def __init__(self): """ 声明3个cv解析器 """ self.cv_parser_58 = CvParser58() self.cv_parser_51job = CvParser51Job() self.cv_parser_zhilian = CvParserZhiLian() self.cv_parser_liepin = CvParserLiepin() self.cv_parser_highpin = CvParserHighPin()
class CvHandler(object): def __init__(self): # 初始化3个网站的解析器 self.cv_parser_zhilian = CvParserZhiLian() self.cv_parser_51job = CvParser51Job() self.cv_parser_58 = CvParser58() def parseHtml(self,htmlContent=None,cvFrom=None): """ cv_html: 输入的html源码, cv_from:[58,51job,zhilian]中的一个 """ result = dict() if re.search(u"51job",cvFrom): result = self.cv_parser_51job.parser(htmlContent) elif re.search(u"zhilian",cvFrom): result = self.cv_parser_zhilian.parser(htmlContent) elif re.search(u"51job",cvFrom): result = self.cv_parser_51job.parser(htmlContent) elif re.search(u"58",cvFrom): result = self.cv_parser_58.parser(htmlContent) # cvId 和 cvUrl ,cvFrom 根据情况需要处理时填充 result["baseInfo"] = CvBaseInfoRaw(**result["baseInfo"]) result["privateInfo"] = CvPrivateInfoRaw(**result["privateInfo"]) result["jobExp"] = CvJobExpRaw(**result["jobExp"]) result["eduList"] =[CvEduItemRaw(**x) for x in result["eduList"]] result["jobList"] =[CvJobItemRaw(**x) for x in result["jobList"]] result["proList"] =[CvProItemRaw(**x) for x in result["proList"]] result["certList"] =[CvCertItemRaw(**x) for x in result["certList"]] result["trainList"] =[CvTrainItemRaw(**x) for x in result["trainList"]] result["languageList"] = [CvLanguageItemRaw(**x) for x in result["languageList"]] result["skillList"] = [ CvSkillItemRaw(**x) for x in result["skillList"]] # result["others"] = json.dumps(result["others"]) result = CvRaw(**result) print("=="*20) print result.others return result
class CvHandler(object): def __init__(self): # 初始化3个网站的解析器 self.cv_parser_zhilian = CvParserZhiLian() self.cv_parser_51job = CvParser51Job() self.cv_parser_58 = CvParser58() def parseHtml(self,htmlContent=None,cvFrom=None): """ cv_html: 输入的html源码, cv_from:[58,51job,zhilian]中的一个 """ result = dict() if re.search(u"zhilian",cvFrom): result = self.cv_parser_zhilian.parser(htmlContent) elif re.search(u"51job",cvFrom): result = self.cv_parser_51job.parser(htmlContent) elif re.search(u"58",cvFrom): result = self.cv_parser_58.parser(htmlContent) # cvId 和 cvUrl ,cvFrom 根据情况需要处理时填充 result["baseInfo"] = CvBaseInfoRaw(**result["baseInfo"]) result["privateInfo"] = CvPrivateInfoRaw(**result["privateInfo"]) result["jobExp"] = CvJobExpRaw(**result["jobExp"]) result["eduList"] =[CvEduItemRaw(**x) for x in result["eduList"]] result["jobList"] =[CvJobItemRaw(**x) for x in result["jobList"]] result["proList"] =[CvProItemRaw(**x) for x in result["proList"]] result["certList"] =[CvCertItemRaw(**x) for x in result["certList"]] result["trainList"] =[CvTrainItemRaw(**x) for x in result["trainList"]] result["languageList"] = [CvLanguageItemRaw(**x) for x in result["languageList"]] result["skillList"] = [ CvSkillItemRaw(**x) for x in result["skillList"]] result = CvRaw(**result) return result
def __init__(self): # 初始化3个网站的解析器 self.cv_parser_zhilian = CvParserZhiLian() self.cv_parser_51job = CvParser51Job() self.cv_parser_58 = CvParser58()