예제 #1
0
파일: jdinc.py 프로젝트: haogods/etl_task
def parse(doc):
    """
   - incName: 公司名称
   - incIndustry: 行业
   - incType: 公司类型
   - incScale: 公司规模
   - incIntro: 公司介绍
   - incUrl: 公司Url
    """
    ret = JdIncRaw()
    ret.incName, ret.incUrl = _get_inc_name_url(doc)
    ret.incIntro = _get_inc_intro(doc)
    _set_inc_others(ret, doc)
    return ret
예제 #2
0
파일: jdinc.py 프로젝트: haogods/etl_task
def parse(doc):
    """
   - incName: 公司名称
   - incIndustry: 行业
   - incType: 公司类型
   - incScale: 公司规模
   - incIntro: 公司介绍
    """
    ret = JdIncRaw()
    ret.incName = _get_inc_name(doc)
    _set_inc_industry_and_scale(ret, doc)
    ret.incType = _get_inc_type(doc)
    ret.incIntro = _get_inc_intro(doc)
    ret.incUrl = _get_inc_url(doc)
    return ret
예제 #3
0
파일: format.py 프로젝트: haogods/etl_task
def construct_jd_raw(item):
    jd_raw = JdRaw()
    jd_raw.jdId = item.jdId
    jd_raw.jdFrom = item.jdFrom
    jd_raw.jdUrl = item.jdUrl
    jd_raw.pubDate = DtString(item.pubDate, getFlag(item.pubDate))

    # 公司信息
    jd_inc = JdIncRaw()
    jd_inc.incName = DtString(item.incName, getFlag(item.incName))
    jd_inc.incType = DtString(item.incType, getFlag(item.incType))
    jd_inc.incIntro = DtString(item.incIntro, getFlag(item.incIntro))
    jd_inc.incIndustry = DtString(item.incIndustry, getFlag(item.incIndustry))
    jd_inc.incScale = DtString(item.incScale, getFlag(item.incScale))
    jd_inc.incUrl = DtString() # 量化不需要

    # 职位信息
    job_info = JdJobRaw()
    job_info.jobPosition = DtString(item.jobPosition, getFlag(item.jobPosition))
    job_info.jobSalary = DtString(item.jobSalary, getFlag(item.jobSalary))
    job_info.jobWorkLoc = DtString(item.jobWorkLoc, getFlag(item.jobWorkLoc))
    job_info.jobDiploma = DtString(item.jobDiploma, getFlag(item.jobDiploma))
    job_info.jobWorkAge = DtString(item.jobWorkAge, getFlag(item.jobWorkAge))
    job_info.jobDescription = DtString() #量化不需要
    job_info.jobCate = DtString(item.jobCate, getFlag(item.jobCate))
    job_info.jobType = DtString() #量化不需要
    job_info.jobWelfare = DtString() #量化不需要

    jd_raw.jdJob = job_info
    jd_raw.jdInc = jd_inc

    remedyInfo = JdRemedyRaw()
    remedyInfo.age = DtString()
    remedyInfo.cert = [DtString()]
    remedyInfo.diploma = DtString()
    remedyInfo.endTime = DtString()
    remedyInfo.gender = DtString()
    remedyInfo.incName = DtString()
    remedyInfo.incTags = [DtString()]
    remedyInfo.jdId = ""
    remedyInfo.jobPosition = DtString()
    remedyInfo.jobTags = [DtString()]
    remedyInfo.jobWelfare = DtString()
    remedyInfo.major = DtString()
    remedyInfo.pubTime = DtString()
    remedyInfo.salary = DtString()
    remedyInfo.skills = [DtString()]
    remedyInfo.workDemand = DtString()
    remedyInfo.workLoc = DtString()
    remedyInfo.workDuty = DtString()
    remedyInfo.workExp = DtString()

    jd_raw.remedyInfo = remedyInfo

    return jd_raw