Esempio n. 1
0
def getCompanyInfo(dom):
    '''获取一个公司的信息'''
    info_items = dom('.companyInfoItems')
    info_trs = info_items('.companyInfoTab tr')
    

    company_info = {}
    for tr in info_trs:
        tr = pq(tr)
        k = tr('td:eq(0)').text().split(u':')[0]
        v = tr('td:eq(1)').text()
        company_info[k] = v

    scale = company_info.get(u'公司规模')
    if scale:
        sh = re.search(r'(\d+)-(\d+)', scale)
        scale = sh.groups() if sh else (None, None)
    else:
        scale = (None, None)

    ####
    jcs = dom('.jobContact>div>div').find('div') # Job Contact
    for jc in jcs:
        jc = pq(jc)
        jctext = jc.text().split(u':')
        if len(jctext) == 2:
            k, v = jctext
            company_info[k] = v 

    com = Company()
    com.name = info_items('.companyTitle').text()
    com.industry = company_info.get(u'公司行业')
    com.type = company_info.get(u'公司类型')
    com.address = company_info.get(u'公司地址')
    com.website = company_info.get(u'公司主页')
    com.scale_low, com.scale_high = scale
    com.email = None
    com.phone_num = None
    com.description = dom('.black12 tr:eq(2)').find('td').html()
    com.etag = ''

    return com
Esempio n. 2
0
def getCompanyInfo(dom):
    '''获取一个公司的信息'''
    info_items = dom('.companyInfoItems')
    info_trs = info_items('.companyInfoTab tr')

    company_info = {}
    for tr in info_trs:
        tr = pq(tr)
        k = tr('td:eq(0)').text().split(u':')[0]
        v = tr('td:eq(1)').text()
        company_info[k] = v

    scale = company_info.get(u'公司规模')
    if scale:
        sh = re.search(r'(\d+)-(\d+)', scale)
        scale = sh.groups() if sh else (None, None)
    else:
        scale = (None, None)

    ####
    jcs = dom('.jobContact>div>div').find('div')  # Job Contact
    for jc in jcs:
        jc = pq(jc)
        jctext = jc.text().split(u':')
        if len(jctext) == 2:
            k, v = jctext
            company_info[k] = v

    com = Company()
    com.name = info_items('.companyTitle').text()
    com.industry = company_info.get(u'公司行业')
    com.type = company_info.get(u'公司类型')
    com.address = company_info.get(u'公司地址')
    com.website = company_info.get(u'公司主页')
    com.scale_low, com.scale_high = scale
    com.email = None
    com.phone_num = None
    com.description = dom('.black12 tr:eq(2)').find('td').html()
    com.etag = ''

    return com