def assembly_detail(cls, company: Company, raw_company_detail: dict): company.homepage = raw_company_detail.get('websiteList', '-') company.register_code = raw_company_detail.get('regNumber', '-') company.organization_code = raw_company_detail.get('orgNumber', '-') company.english_name = raw_company_detail.get('property3', '-') company.authorization = raw_company_detail.get('regInstitute', '-') company.actual_capital = raw_company_detail.get('actualCapital', '缺省') company.industry = raw_company_detail.get('industry', '-') company.used_name = raw_company_detail.get('historyNames', '-')
def assembly_detail(cls, company: Company, raw_company_detail: dict): company.homepage = raw_company_detail.get('WebSite', '-')[0:30] company.register_code = raw_company_detail.get('No', '-') company.organization_code = raw_company_detail.get('OrgNo', '-') company.english_name = raw_company_detail.get('EnglishName', '-') company.authorization = raw_company_detail.get('BelongOrg', '-') company.actual_capital = raw_company_detail.get('RealCapi', '缺省') company.industry = raw_company_detail.get('Industry', dict()).get('Industry', '-') company.used_name = raw_company_detail.get('OriginalName', '-')
def start(): """ 入口函数 """ keys = globals().get('keywords', list()) for key in keys: raw_companies = tyc_client.search(key) cost_time = 2 * raw_companies.__len__() log.info('正在处理爬取[%s],大概需要%s秒' % (key, cost_time)) # company对象 company = Company() for raw_company in raw_companies: company.keyword = key manager.assembly(company, raw_company) # company detail raw_company_detail = tyc_client.search_detail(raw_company.get('id')) manager.assembly_detail(company, raw_company_detail) log.info(company) # save(company.__dict__) company.clear() log.info("completed")
def start(): keywords = globals().get('keywords') for keyword in keywords: raw_companies = qcc_client.search(keyword) log.info('正在处理爬取[%s]' % keyword) # company对象 company = Company() for raw_company in raw_companies: company.keyword = keyword # 组装公司信息 manager.assembly(company, raw_company) raw_company_detail = qcc_client.search_detail(raw_company.get('KeyNo')) # 补充公司详细信息 manager.assembly_detail(company, raw_company_detail) # 保存到数据库 # save(company.__dict__) log.info(company) company.clear() log.info('completed')
def start(): keywords = globals().get('keywords') if keywords: for keyword in keywords: raw_companies = qcc_client.search(keyword) cost_time = 2 * raw_companies.__len__() + 4 log.info('正在处理爬取[%s],大概需要%s秒' % (keyword, cost_time)) # company对象 company = Company() for raw_company in raw_companies: company.keyword = keyword # 组装公司信息 manager.assembly(company, raw_company) raw_company_detail = qcc_client.search_detail( raw_company.get('KeyNo')) # 补充公司详细信息 manager.assembly_detail(company, raw_company_detail) # 保存到数据库 save(company.__dict__) # 重置当前对象 company.clear() log.info('completed')
def assembly(cls, company: Company, raw_company: dict): company.name = raw_company.get('Name', '-') company.representative = raw_company.get('OperName', '-') company.address = raw_company.get('Address', '-') company.region = raw_company.get('AreaCode', '-') # todo company.city = raw_company.get('AreaCode', '-') # todo company.district = raw_company.get('AreaCode', '-') # todo company.biz_status = raw_company.get('Status', '-') company.credit_code = raw_company.get('CreditCode', '-') company.email = raw_company.get('Email', '-') company.phone = raw_company.get('ContactNumber', '-') company.biz_scope = raw_company.get('Scope', '-') company.company_type = raw_company.get('EconKind', '-') company.taxpayer_code = raw_company.get('CreditCode', '-') company.registered_capital = raw_company.get('RegistCapi', '-') company.lat_long = str({ 'lat': raw_company.get('X', '-'), 'long': raw_company.get('Y', '-') }) company.setup_time = raw_company.get('StartDate', '-')
def assembly(cls, company: Company, raw_company: dict): company.name = raw_company.get('name', '-').replace('<em>', '').replace('</em>', '') company.representative = raw_company.get('legalPersonName', '-') company.address = raw_company.get('regLocation', '-') company.region = raw_company.get('base', '-') company.city = raw_company.get('city', '-') company.district = raw_company.get('district', '-') company.biz_status = raw_company.get('regStatus', '-') company.credit_code = raw_company.get('creditCode', '-') company.email = raw_company.get('emails', ['-']).split(';')[0].replace('\t', '') company.phone = raw_company.get('phoneNum', '-') company.biz_scope = raw_company.get('businessScope', '-') company.company_type = raw_company.get('companyOrgType', '-').replace('\t', '') company.taxpayer_code = raw_company.get('creditCode', '-') company.registered_capital = raw_company.get('regCapital', '-') company.lat_long = str({ 'lat': raw_company.get('latitude', '-'), 'long': raw_company.get('longitude', '-') }) company.setup_time = raw_company.get('estiblishTime', '-')[0:10]