Esempio n. 1
0
def construct_jd_raw(item):
    jd_raw = JdRaw()
    jd_raw.jdId = item.jdId
    jd_raw.jdFrom = item.jdFrom
    jd_raw.jdUrl = item.jdUrl
    jd_raw.pubDate = DtString(item.pubDate, getFlag(item.pubDate))

    # 公司信息
    jd_inc = JdIncRaw()
    jd_inc.incName = DtString(item.incName, getFlag(item.incName))
    jd_inc.incType = DtString(item.incType, getFlag(item.incType))
    jd_inc.incIntro = DtString(item.incIntro, getFlag(item.incIntro))
    jd_inc.incIndustry = DtString(item.incIndustry, getFlag(item.incIndustry))
    jd_inc.incScale = DtString(item.incScale, getFlag(item.incScale))
    jd_inc.incUrl = DtString() # 量化不需要

    # 职位信息
    job_info = JdJobRaw()
    job_info.jobPosition = DtString(item.jobPosition, getFlag(item.jobPosition))
    job_info.jobSalary = DtString(item.jobSalary, getFlag(item.jobSalary))
    job_info.jobWorkLoc = DtString(item.jobWorkLoc, getFlag(item.jobWorkLoc))
    job_info.jobDiploma = DtString(item.jobDiploma, getFlag(item.jobDiploma))
    job_info.jobWorkAge = DtString(item.jobWorkAge, getFlag(item.jobWorkAge))
    job_info.jobDescription = DtString() #量化不需要
    job_info.jobCate = DtString(item.jobCate, getFlag(item.jobCate))
    job_info.jobType = DtString() #量化不需要
    job_info.jobWelfare = DtString() #量化不需要

    jd_raw.jdJob = job_info
    jd_raw.jdInc = jd_inc

    remedyInfo = JdRemedyRaw()
    remedyInfo.age = DtString()
    remedyInfo.cert = [DtString()]
    remedyInfo.diploma = DtString()
    remedyInfo.endTime = DtString()
    remedyInfo.gender = DtString()
    remedyInfo.incName = DtString()
    remedyInfo.incTags = [DtString()]
    remedyInfo.jdId = ""
    remedyInfo.jobPosition = DtString()
    remedyInfo.jobTags = [DtString()]
    remedyInfo.jobWelfare = DtString()
    remedyInfo.major = DtString()
    remedyInfo.pubTime = DtString()
    remedyInfo.salary = DtString()
    remedyInfo.skills = [DtString()]
    remedyInfo.workDemand = DtString()
    remedyInfo.workLoc = DtString()
    remedyInfo.workDuty = DtString()
    remedyInfo.workExp = DtString()

    jd_raw.remedyInfo = remedyInfo

    return jd_raw
Esempio n. 2
0
def parse(content):
    # check(content)
    doc = html.fromstring(content)
    jdRaw = JdRaw(jdFrom="jd_zhilian")
    jdRaw.jdUrl = ""
    jdRaw.jdInc = jdinc.parse(doc)
    jdRaw.jdJob = jdjob.parse(doc)
    pub_date = doc.xpath("//ul[@class='terminal-ul clearfix']/li[3]/strong")
    if pub_date:
        jdRaw.pubDate = DtString(pub_date[0].text_content(), DataFlag(hasValue=True).toByte())
    return jdRaw
Esempio n. 3
0
def parse(content):
    doc = html.fromstring(content)
    jdRaw = JdRaw(jdFrom="jd_lagou")
    jdRaw.jdInc = jdinc.parse(doc)
    jdRaw.jdJob = jdjob.parse(doc)
    return jdRaw