def create_or_update(data): q = session.query(Company).filter_by(name=data['name']) company = q.first() if not company: company = Company(**data) session.add(company) session.commit() else: q.update(data)
def start(): keywords = globals().get('keywords') for keyword in keywords: raw_companies = qcc_client.search(keyword) log.info('正在处理爬取[%s]' % keyword) # company对象 company = Company() for raw_company in raw_companies: company.keyword = keyword # 组装公司信息 manager.assembly(company, raw_company) raw_company_detail = qcc_client.search_detail( raw_company.get('KeyNo')) # 补充公司详细信息 manager.assembly_detail(company, raw_company_detail) log.info(company) log.info('completed')
def __post_process__(self): """""" if not self.src: return todos = self.src for t in todos: detail = Request(TycEntApi.format(eid=t.get("id")), proxy=True).data if not detail: continue detail = json.loads(detail) if detail.get("state") == 'ok': td = detail.get("data", {}) company = Company() # 复制主体信息 TycClient.TycEntHelper.__copy_props__(t, company) # 复制公司组织代码、注册资本 TycClient.TycEntHelper.__copy_extras__(td, company) self.companies.append(company)
async def post(self): r_dict = {'code': 0} race_cid = self.get_argument('race_cid', '') unit_id = None try: title = self.get_argument('title', None) status = self.get_argument('status', None) # 状态 code = self.get_argument('code', None) if title and code: unit_count = await Company.count( dict(record_flag=1, code=code, race_cid=race_cid)) if unit_count > 0: r_dict['code'] = -3 else: if status == 'on': status = STATUS_UNIT_MANAGER_ACTIVE else: status = STATUS_UNIT_MANAGER_INACTIVE unit = Company(title=title, code=code) unit.race_cid = race_cid unit.status = status unit.created_dt = datetime.datetime.now() unit.updated_id = self.current_user.oid unit_id = await unit.save() r_dict['code'] = 1 else: if not title: r_dict['code'] = -1 if not code: r_dict['code'] = -2 except Exception: # 如果因为网络问题等其他问题导致前端展示添加不正确但是数据已经保存到数据库了,应该删除掉 if unit_id: await Company.delete_by_ids([unit_id]) logger.error(traceback.format_exc()) return r_dict
def __post_process__(self): if not self.src: return company_list = self.src for company in company_list: company_entity = Company() # 公司检索的关键字 company_entity.keyword = self.keyword # 公司主体基本信息 self.EntityHelper.__basic_info__(company, company_entity) def is_equal(b_and_a): return company.get('id') == b_and_a.get('graphId') try: # 公司主体融资阶段、竟品信息 brand_and_agency = filter(is_equal, self.brand_and_agencies).__next__() self.EntityHelper.__another_info__(brand_and_agency, company_entity) except: logging.warning('竟品信息获取失败!') pass """ 公司详情 """ detail_resp = Request(TycPortraitApi.format(eid=company.get("id")), proxy=True, headers=REQUEST_HEADERS).data if detail_resp: company_portrait = json.loads(detail_resp) # 公司详情补充信息 if company_portrait.get("state") == 'ok': self.EntityHelper.__additional__( company_portrait.get("data", {}), company_entity) shareholder_request_body = { "graphId": company.get("id"), "hkVersion": 1, "typeList": { "shareHolder": { "pageNum": 1, "pageSize": 20, "required": "true" } } } """ 股东信息 """ shareholder_resp = Request(TycShareholderPostApi, method='post', json=shareholder_request_body, proxy=True, headers=REQUEST_HEADERS).data if shareholder_resp: company_shareholder = json.loads(shareholder_resp) # 公司详情补充信息 if company_shareholder.get("state") == 'ok': self.EntityHelper.__shareholder__( company_shareholder.get("data", {}).get("shareHolder", {}), company_entity) manager_request_body = { "graphId": company.get("id"), "hkVersion": 1, "typeList": { "companyStaff": { "pageNum": 1, "pageSize": 20, "required": "true" } } } """ 高管信息 """ manager_resp = Request(TycEnterpriseManagerPostApi, method='post', json=manager_request_body, proxy=True, headers=REQUEST_HEADERS).data if manager_resp: company_manager = json.loads(manager_resp) # 公司详情补充信息 if company_manager.get("state") == 'ok': self.EntityHelper.__company_manager__( company_manager.get("data", {}).get("companyStaff", {}), company_entity) self.companies.append(company_entity)
async def post(self): """ 注册weid,其中kyc认证部分暂时忽略,后续可自行调整 :return: """ r_dict = {'respBody': '', 'errorCode': 200, 'errorMessage': 'success'} category = int(self.get_i_argument('category')) if category == 0: stuId = self.get_i_argument('stuId') name = self.get_i_argument('name') school = self.get_i_argument('school') id_card = self.get_i_argument('id_card') try: res = requests.post(host, json=Args.register_weid).json() print(res) _, tx_hash = fisco_client.fisco_add_data( 'insertStudent', [res['respBody'], stuId, id_card, name, school]) student = Student() student.weid = res['respBody'] student.school = school student.name = name student.stuId = stuId student.idCard = id_card student.tx_hash = tx_hash await student.save() r_dict['respBody'] = res['respBody'] except Exception: logger.error(traceback.format_exc()) return r_dict elif category == 1: name = self.get_i_argument('name') location = self.get_i_argument('location') business = self.get_i_argument('business') try: res = requests.post(host, json=Args.register_weid).json() print(res) _, tx_hash = fisco_client.fisco_add_data( 'insertCompany', [res['respBody'], name, location, business]) company = Company() company.name = name company.location = location company.business = business company.weid = res['respBody'] company.tx_hash = tx_hash await company.save() r_dict['respBody'] = res['respBody'] except Exception: logger.error(traceback.format_exc()) return r_dict else: name = self.get_i_argument('name') school = self.get_i_argument('school') teacher_id = self.get_i_argument('teacher_id') try: res = requests.post(host, json=Args.register_weid).json() # fisco_client.fisco_add_data('insetCompany', [name, location, business, res['respBody']]) teacher = Teacher() teacher.weid = res['respBody'] teacher.school = school teacher.name = name teacher.teacher_id = teacher_id await teacher.save() res['user_cid'] = teacher.cid res['category'] = 2 r_dict['respBody'] = res['respBody'] except Exception: pass return r_dict
async def __subject_import_excel(self, race_cid, excel_file_content): result_code = 1 # 所有单位的编号 unit_code_list = await Company.distinct('code', { 'race_cid': race_cid, 'record_flag': 1 }) book = xlrd.open_workbook(file_contents=excel_file_content) # 获得第一个表的信息 sheet = book.sheet_by_index(0) unit_list = [] row_list = [] title_list = [] for ind, col in enumerate(sheet.row_values(0)): if col: title_list.append(col) # 判断表头是否正确 if len(title_list) != 2: result_code = 2 return result_code # 拿到所有的行数据 for rownum in range(1, sheet.nrows): row_list.append([col for col in sheet.row_values(rownum)]) # 上传文件中有一些已经存在过的,需要删除,重新覆盖 delete_unit_oid_list = [] if row_list: for i, row_data in enumerate(row_list): code_repeat = False # 单位编码 unit_code = str(self.__get_replace_data(row_data[0], 2)) # 单位编码不能超过16位 if not unit_code or len(unit_code) > 16: continue else: reg = re.compile(r'^[a-zA-Z0-9]*$') if not bool(reg.match(unit_code)): continue if unit_code in unit_code_list: unit = await Company.find_one(dict(code=unit_code)) # 上传文件中有编码重复的行,只添加一个 if unit: delete_unit_oid_list.append(unit.oid) await unit.delete() else: code_repeat = True unit = Company() unit.race_cid = race_cid unit.code = unit_code unit.title = str(row_data[1])[:-2] if not isinstance( row_data[1], str) else row_data[1] unit.status = STATUS_UNIT_MANAGER_ACTIVE unit_code_list.append(unit_code) if not code_repeat: unit_list.append(unit) if len(unit_list) == 500: await Company.insert_many(unit_list) unit_list = [] if unit_list: await Company.insert_many(unit_list) await Company.delete_by_ids(delete_unit_oid_list) return result_code