def getCompanyInfo(dom): '''获取一个公司的信息''' info_items = dom('.companyInfoItems') info_trs = info_items('.companyInfoTab tr') company_info = {} for tr in info_trs: tr = pq(tr) k = tr('td:eq(0)').text().split(u':')[0] v = tr('td:eq(1)').text() company_info[k] = v scale = company_info.get(u'公司规模') if scale: sh = re.search(r'(\d+)-(\d+)', scale) scale = sh.groups() if sh else (None, None) else: scale = (None, None) #### jcs = dom('.jobContact>div>div').find('div') # Job Contact for jc in jcs: jc = pq(jc) jctext = jc.text().split(u':') if len(jctext) == 2: k, v = jctext company_info[k] = v com = Company() com.name = info_items('.companyTitle').text() com.industry = company_info.get(u'公司行业') com.type = company_info.get(u'公司类型') com.address = company_info.get(u'公司地址') com.website = company_info.get(u'公司主页') com.scale_low, com.scale_high = scale com.email = None com.phone_num = None com.description = dom('.black12 tr:eq(2)').find('td').html() com.etag = '' return com