Esempio n. 1
0
class JDHandler(object):

    def __init__(self):
        # 初始化四个网站的解析器

        self.jd_parser = JdParser()


    def analyzeHtml(self,htmlContent=None,jdFrom=None):
        """
        jd_html: 输入的html源码,
        jd_from:[lagou,51job,zhilian,liepin]中的一个
        """
        result = dict()
        result_inc = dict()
        result_job = dict()

        if not jdFrom:
            raise ValueError("jdFrom invalid")

        try:
            result = self.jd_parser.parser(htmlContent,jdFrom=jdFrom,detail=True)   # detail=False 为基础解析,True为详尽解析
        except Exception as e:
            raise NamedError(e.message)


        result_inc = result["jdInc"]
        result_job = result["jdJob"]

        # jdId 和 jdUrl 需要处理时填充
        # result["jdId"] = "None"
        # result["jdUrl"] = "None"

        result["jdInc"] = JdIncRaw(**result_inc)
        result["jdJob"] = JdJobRaw(**result_job)

        result = JdRaw(**result)

        return result
Esempio n. 2
0
class JDHandler(object):
    def __init__(self):
        # 初始化四个网站的解析器

        self.jd_parser = JdParser()

    def analyzeHtml(self, htmlContent=None, jdFrom=None):
        """
        jd_html: 输入的html源码,
        jd_from:[lagou,51job,zhilian,liepin]中的一个
        """
        result = dict()
        result_inc = dict()
        result_job = dict()

        if not jdFrom:
            raise ValueError("jdFrom invalid")

        try:
            result = self.jd_parser.parser(
                htmlContent, jdFrom=jdFrom,
                detail=True)  # detail=False 为基础解析,True为详尽解析
        except Exception as e:
            raise NamedError(e.message)

        result_inc = result["jdInc"]
        result_job = result["jdJob"]

        # jdId 和 jdUrl 需要处理时填充
        # result["jdId"] = "None"
        # result["jdUrl"] = "None"

        result["jdInc"] = JdIncRaw(**result_inc)
        result["jdJob"] = JdJobRaw(**result_job)

        result = JdRaw(**result)

        return result