def quit_empl(): if request.method == 'GET': return Status(201, 'failure', u'quit_empl API请求方法错误', {}).json() try: req_json = request.get_json() _type = req_json.get('type') if _type in ['one', u'one']: return EmployeeService().quit_empl(req_json.get('data_id')) _res = list() data_ids = req_json.get('data_id') for order_id in data_ids: if not order_id: continue res = EmployeeService().quit_empl(order_id) res = json.loads(res) if res.get('status_id') != 100: _res.append(res.get('data').get('data_id')) if len(_res) > 0: return Status(201, 'failure', u'订单:%s删除失败' % ','.join(_res), {}).json() return Status(100, 'failure', u'订单:%s删除成功' % ','.join(data_ids), {}).json() except Exception as e: LOG.error("employee>quit_empl is error: %s" % e) return Status(101, 'failure', u'Server发生错误,获取失败', {}).json()
def adds(self, datas): if not datas: LOG.error('DB: data is null.') return failure_list = list() success_list = list() for data in datas: if not data: continue if isinstance(data, str): data = dict(data) credit_code = data.get('credit_code') name = data.get('name') if not credit_code: failure_list.append(name) continue model = self.enterprise_bo.get_by_code(credit_code) if model: failure_list.append(name) continue new_model = self.enterprise_bo.new_mode() new_model.name = name new_model.phone = data.get('phone') new_model.email = data.get('email') new_model.tyc_url = data.get('tyc_url') new_model.company_url = data.get('company_url') new_model.address = data.get('address') new_model.register_funds = data.get('register_funds') new_model.paidin_funds = data.get('paidin_funds') new_model.establish_date = data.get('establish_date') new_model.status = data.get('status') new_model.credit_code = credit_code new_model.company_type = data.get('company_type') new_model.industry = data.get('industry') new_model.business_term = data.get('business_term') new_model.resume = data.get('resume') new_model.business_scope = data.get('business_scope') new_model.key = data.get('key') new_model.create_time = get_now() new_model.city = data.get('city') new_model.sub_city = data.get('sub_city') try: self.enterprise_bo.add_model(new_model) success_list.append(name) except Exception as e: LOG.error('DB add error %s: %s' % (e, str(data))) failure_list.append(name) else: if success_list: LOG.info('success list:【%s】' % len(success_list)) if failure_list: LOG.info('failure list:【%s】' % len(failure_list)) return success_list, failure_list
def api_list_all(): if request.method == 'GET': return Status(201, 'failure', u'api_list API请求方法错误', {}).json() try: json = request.get_json() res = EmployeeService().get_all(json) except Exception as e: LOG.error("employee>api_list is error: %s" % e) res = Status(101, 'failure', u'Server发生错误,获取失败', {}).json() return res
def add_or_edit_api(): if request.method == 'GET': return Status(201, 'failure', u'add_or_edit_api API请求方法错误', {}).json() try: json = request.get_json() res = EmployeeService().add_or_edit_empl(json) except Exception as e: LOG.error("employee>add_api is error: %s" % e) res = Status(101, 'failure', u'Server发生错误,新增失败', {}).json() return res
def api_post(url, headers={}, data={}, retry=1, resptype='json', **kwargs): """ http post :param url: url :param headers: headers :param data: data :return: response """ if not url: return False, 'api_post url is not allow null' if isinstance(data, dict): data = json.dumps(data) if not isinstance(headers, dict): headers = json.dumps(headers) try: if not IS_PROXY_RUN: response = requests.post(url=url, headers=headers, data=data, timeout=5) else: random_ip = get_random_proxy() proxies = {'http': random_ip} if random_ip else {} response = requests.post(url=url, headers=headers, data=data, timeout=5, proxies=proxies) except Exception as e: if retry <= 3: random_sleep(1, 1.5) api_post(url=url, headers=headers, data=data, retry=retry + 1, resptype=resptype) else: LOG.error(u'@@@@@ %s api_post error: %s' % (url, e)) return False, [] else: respcode = response.status_code if respcode != 200: return False, 'api_post response status code is: %s' % respcode elif respcode == 200 and resptype == 'raw': return True, response.raw elif respcode == 200 and resptype == 'content': return True, response.content elif respcode == 200 and resptype == 'json': return True, response.json() else: return True, response.text
def adds(self, datas): if not datas: LOG.error('DB: data is null.') failure_list = list() success_list = list() for data in datas: if not data: continue credit_code = data.get('credit_code') name = data.get('name') model = self.enterprise_bo.get_by_code(credit_code) if model: failure_list.append(name) continue new_model = self.enterprise_bo.new_mode() new_model.name = name new_model.phone = data.get('phone') new_model.email = data.get('email') new_model.tyt_url = data.get('tyt_url') new_model.company_url = data.get('company_url') new_model.address = data.get('address') new_model.register_funds = data.get('register_funds') new_model.paidin_funds = data.get('paidin_funds') new_model.establish_date = data.get('establish_date') new_model.status = data.get('status') new_model.credit_code = credit_code new_model.registration_number = data.get('registration_number') new_model.identification_number = data.get('identification_number') new_model.organization_code = data.get('organization_code') new_model.company_type = data.get('company_type') new_model.industry = data.get('industry') new_model.business_term = data.get('business_term') new_model.taxpayer_qualification = data.get( 'taxpayer_qualification') new_model.personnel_size = data.get('personnel_size') new_model.insured_num = data.get('insured_num') new_model.resume = data.get('resume') new_model.registered_address = data.get('registered_address') new_model.business_scope = data.get('business_scope') self.enterprise_bo.add_model(new_model) success_list.append(name) else: print('success list【%s】:%s' % (len(success_list), ','.join(success_list))) print('failure list【%s】:%s' % (len(failure_list), ','.join(failure_list)))
def upload_image(): image = request.files.get('avatar') g.menuf = 'setter' g.menusub = 'user' try: form = request.form res = SetterService().upload_info(image, form) except Exception as e: LOG.error("setter>upload_info is error: %s" % e) res = Status(101, 'failure', u'Server发生错误,获取失败', {}).json() LOG.info('%s update information' % get_user_id()) return res
def upload_info(self, image_file, form): for k, v in self.request_list.iteritems(): if not k or not v: continue if not form.get(k): return Status(202, 'failure', u'请完善%s信息在进行提交' % v, {}).json() db_image = '' if image_file: image_name = image_file.filename if not self.__allow_format_img(image_name): return Status(202, 'failure', u'图片格式支持:jpg、png、bmp、jpeg', {}).json() _base_dir = get_base_dir() now_date = get_now(format="%Y-%m-%d") def __get_filename_by_md5(file_name): suffix = (os.path.splitext(file_name)[1]).lower() _v = get_now() + file_name return (md5(_v) + suffix) store_file_name = __get_filename_by_md5(image_name) db_image = os.path.join((UPLOAD_BASE_DIR + now_date), store_file_name) store_dir = _base_dir + UPLOAD_BASE_DIR + now_date if not os.path.exists(store_dir): mk_dirs(store_dir) image_file.save(os.path.join(store_dir, store_file_name)) try: user_mode = self.sysuser_bo.get_user_by_params(get_user_id()) if db_image: user_mode.image = db_image ret_image = db_image if db_image else user_mode.image user_mode.fullname = form.get('fullname') user_mode.phone = form.get('phone') if form.get('email'): user_mode.email = form.get('email') self.sysuser_bo.merge_model(user_mode) except Exception as e: LOG.error("upload info is error: %s" % e) return Status(300, 'failure', u'upload info更新db失败', {}).json() return Status(100, 'success', u'信息完善成功!', {'image': ret_image}).json()
def _process_by_key(self): pool = multiprocessing.Pool(processes=(MAX_CPU - 1 if MAX_CPU > 2 else 1)) LOG.info('Main process: %s, run cpu count: %s' % (os.getpid(), (MAX_CPU - 1 if MAX_CPU > 2 else 1))) process = list() for key in self.keys: if not key: continue min_page, max_page, max_pagination, max_range = self.tyc_client.get_pagination(key) LOG.info('[%s][%s]spider page: %s ~ %s ||| max_pagination: %s ||| max range: %s' % (RUN_MODE, key, min_page, max_page, max_pagination, max_range)) if not max_range: LOG.error("It's not have max range") sys.exit() for i in range(0, max_range, 1): max_page = min_page + PAGINATION if max_page > max_pagination: max_page = max_pagination process.append( pool.apply_async(self.tyc_client.work_by_key, args=(key, min_page, max_page, self.q, self.citys_list, self.sub_citys_mapping)) ) min_page = max_page + 1 pool.close() pool.join() while 1: try: if self.q.empty(): break self.ret_res_list.append(self.q.get_nowait()) except: pass self.to_store(self.keys, MIN_PAGE, MAX_PAGE)
def work_by_key(self, key): ret_res = list() if not key: LOG.error("【%s】key is null, no work." % RUN_MODE) return ret_res # page for page in range(0, self.MAX_PAGE, 1): url = TYC_SEARCH_API + '/p%s?key=' % page + parse.quote(key) print(url) is_ok, search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') if not is_ok: continue soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) for tag in tags: if not tag or not tag.attrs.get('href'): continue res_dict = dict() res_dict['tyt_url'] = tag.get('href').strip() res_dict['name'] = tag.get_text().strip() detail_res = self.detail_by_url(res_dict.get('tyt_url')) res_dict.update(detail_res) print(res_dict['name'], res_dict['tyt_url'], str(True if res_dict else False)) ret_res.append(res_dict) random_sleep() # break # break return ret_res
def to_excel(self, datas, columns: dict, exlname=None): if not datas: LOG.error('to excel datas is null') return False if not columns: LOG.error('to excel columns is null') return False if not isinstance(columns, dict): LOG.error('to excel columns is need dict') return False if not exlname: exlname = os.path.join(get_excel_folder(), '%s.xls' % get_now()) f = xlwt.Workbook(encoding='utf-8') sheet = f.add_sheet('sheet', cell_overwrite_ok=True) row0 = list(columns.keys()) row0.insert(0, 'ID') columns.update({'ID': '序号'}) style_title = xlwt.XFStyle() font = xlwt.Font() font.name = 'Times New Roman' font.bold = True font.color_index = 4 font.height = 220 style_title.font = font style_content = xlwt.XFStyle() font = xlwt.Font() font.name = 'Times New Roman' font.bold = False font.color_index = 4 font.height = 220 style_content.font = font # 标题 for i in range(0, len(row0)): sheet.write(0, i, columns.get(row0[i]), style_title) row = 1 for line in datas: if not line: continue for index, data in enumerate(row0): if index == 0: sheet.write(row, index, row, style_title) else: sheet.write(row, index, line.get(row0[index]), style_content) row += 1 f.save(exlname) return exlname
def _is_not_max_range_die(self, max_range): if not max_range: LOG.error("It's not have max range") sys.exit()
def work_by_key(self, key): print(key, '@' * 100) ret_res = list() if not key: LOG.error("【%s】key is null, no work." % RUN_MODE) return ret_res # page is_page = False for ct in range(9): url = '%s/p%s?key=%s' % (TYC_SEARCH_API, 1, parse.quote(key)) is_ok, search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') self.headers['Cookie'] = cookies_get() if is_ok: is_page = True break page_vlas = 200 if not is_page: page_vlas = 200 else: et_late = etree.HTML(search_resp) page_num = [ i.xpath('./li/a/text()')[-2] for i in et_late.xpath( '//div[@class="result-footer"]/div[@class=" search-pager"]/ul' ) ] if page_num: page_vlas = str(page_num[0]).replace('.', '') LOG.critical(f'搜索关键词为:{key}, 总页面:{page_vlas}------------------------') print(f'搜索关键词为:{key}, 总页面:{page_vlas}------------------------') # 公司列表 for page in range(1, int(page_vlas), 1): self.headers['Cookie'] = cookies_get() url = '%s/p%s?key=%s' % (TYC_SEARCH_API, page, parse.quote(key)) print(url, 'Q' * 80) is_ok, search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') if not is_ok: continue soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) def while_req(url): self.headers['Cookie'] = cookies_get() sub_is_ok, sub_search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') return sub_is_ok, sub_search_resp HTNL = etree.HTML(search_resp) print( HTNL.xpath( '//*[@id="web-content"]/div/div[1]/div[3]/div[2]/div[1]/div/div[3]/div[1]/a/text()' ), 'A' * 80) # 添加手动验证功能 if len(tags) == 0: while 1: if is_ok and len(tags) > 0: break else: print(url) LOG.critical('验证############### %s ###############' % url) random_sleep(20, 25) self.headers['Cookie'] = cookies_get() is_ok, search_resp = while_req(url) soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) eto = etree.HTML(search_resp) user_name = eto.xpath('//div[@nav-type="user"]/a/text()') is_success = False for i in range(9): if not ''.join(user_name): self.headers['Cookie'] = cookies_get() is_ok, search_resp = while_req(url) soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) is_success = True break if is_success: for tag in tags: if not tag or not tag.attrs.get('href'): continue res_dict = dict() res_dict['tyt_url'] = tag.get('href').strip() res_dict['name'] = tag.get_text().strip() res_dict['company_id'] = str( tag.get('href')).split('/')[-1] res_dict['label_index'] = str(key) res_dict['rquest_url'] = url res_dict['source'] = '天眼查' res_dict['created_time'] = str(datetime.now()) result = _insert(res_dict) if result.get('status', False): c_id = str(result.get('_id')) try: # detail_res = self.detail_by_url(res_dict.get('tyt_url')) self.detail_by_url(res_dict.get('tyt_url'), c_id) except: try: self.detail_by_url(res_dict.get('tyt_url'), c_id) except: pass ret_res.append(res_dict) random_sleep(1, 2.5) # break # break return ret_res
def work_by_key(self, key): ret_res = list() if not key: LOG.error("【%s】key is null, no work." % RUN_MODE) return ret_res # page for page in range(1, self.MAX_PAGE, 1): url = '%s/p%s?key=%s' % (TYC_SEARCH_API, page, parse.quote(key)) print(url) print(cookies_get()) self.headers['Cookie'] = cookies_get() is_ok, search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') if not is_ok: continue with open('company_list.html', 'w', encoding='utf-8') as wf: wf.write(search_resp) soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) def while_req(url): sub_is_ok, sub_search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') return sub_is_ok, sub_search_resp # 添加手动验证功能 if len(tags) == 0: while 1: if is_ok and len(tags) > 0: break else: LOG.critical('验证############### %s ###############' % url) random_sleep(20, 25) self.headers['Cookie'] = cookies_get() is_ok, search_resp = while_req(url) soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) for tag in tags: if not tag or not tag.attrs.get('href'): continue res_dict = dict() res_dict['tyt_url'] = tag.get('href').strip() res_dict['name'] = tag.get_text().strip() self.save_list( tag.get('href').strip() + '-' + tag.get_text().strip()) # print(res_dict['name'], res_dict['tyt_url'], str(True if res_dict else False)) print(res_dict) ret_res.append(res_dict) random_sleep(1, 2.5)
def work_by_key(self, key, min_page, max_page, type='default', queue=None, cid=None, sub_cid=None, city_info=None, sub_city_info=None): ret_res = list() if not key: LOG.error("【%s】key is null, no work." % RUN_MODE) return ret_res # page for page in range(min_page, max_page + 1, 1): if API_MODE == 'tyc' and type == 'default': url = '%s/p%s?key=%s' % (TYC_SEARCH_API, page, parse.quote(key)) elif API_MODE == 'tyc' and type == 'city': url = '%s/p%s?key=%s&base=%s' % (TYC_SEARCH_API, page, parse.quote(key), city_info.get('name')) elif API_MODE == 'tyc' and type == 'sub_city': if cid in ZXS_CITY_IDS: url = '%s/p%s?key=%s&base=%s&areaCode=%s' % ( TYC_SEARCH_API, page, parse.quote(key), sub_city_info.get('name'), sub_city_info.get('code')) else: url = '%s/p%s?key=%s&base=%s' % (TYC_SEARCH_API, page, parse.quote(key), sub_city_info.get('name')) elif API_MODE == 'pro' and type == 'default': url = '%s/p%s?key=%s' % (TYC_PRO_SEARCH_API, page, parse.quote(key)) elif API_MODE == 'pro' and type == 'city': url = '%s/p%s?key=%s&base=%s' % (TYC_PRO_SEARCH_API, page, parse.quote(key), city_info.get('name')) elif API_MODE == 'pro' and type == 'sub_city': if cid in ZXS_CITY_IDS: url = '%s/p%s?key=%s&base=%s&areaCode=%s&baseArea=%s' \ % (TYC_PRO_SEARCH_API, page, parse.quote(key), city_info.get('name'), sub_city_info.get('code'), parse.quote(sub_city_info.get('name'))) else: url = '%s/p%s?key=%s&base=%s' % (TYC_PRO_SEARCH_API, page, parse.quote(key), sub_city_info.get('name')) else: LOG.critical('====== API_MODE is not in [tyc, pro] ======') sys.exit(1) LOG.info('%s[%s]%s' % (key, API_MODE, url)) self.headers['Referer'] = url is_ok, search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') if not is_ok: continue if self.check_no(url, _type='page'): continue soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) def while_req(url): sub_is_ok, sub_search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') return sub_is_ok, sub_search_resp # 添加手动验证功能 if len(tags) == 0: while 1: if is_ok and len(tags) > 0: break else: LOG.critical('验证############### %s ###############' % url) random_sleep(20, 25) is_ok, search_resp = while_req(url) soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) for tag in tags: if not tag or not tag.attrs.get('href'): continue res_dict = dict() if API_MODE == 'tyc': tyc_url = tag.get('href').strip() elif API_MODE == 'pro': tyc_url = '%s%s/background' % (TYC_PRO_DETAIL_API, tag.get('href').strip()) else: tyc_url = '' res_dict['tyc_url'] = tyc_url res_dict['name'] = tag.get_text().strip() res_dict['key'] = key res_dict['is_send_email'] = False res_dict['city'] = city_info.get( 'full_name') if city_info else '-' res_dict['sub_city'] = sub_city_info.get( 'full_name') if sub_city_info else '-' detail_res = list() if API_MODE == 'tyc': detail_res = self.detail_by_url(res_dict.get('tyc_url')) elif API_MODE == 'pro': detail_res = self.detail_pro_by_url( res_dict.get('tyc_url')) res_dict.update(detail_res) print('%s[%s] %s' % (res_dict['name'], str(True if res_dict else False), res_dict['tyc_url'])) ret_res.append(res_dict) if queue: queue.put(res_dict) random_sleep(3.5, 4.5) if IS_TEST_BREAK: break if IS_TEST_BREAK: break return ret_res
def work_by_key(self, key, min_page=0, max_page=5, queue=None): ret_res = list() if not key: LOG.error("【%s】key is null, no work." % RUN_MODE) return ret_res if not min_page: min_page = self.MIN_PAGE if not max_page: max_page = self.MAX_PAGE LOG.info('%s[%s ~ %s]' % (key, min_page, max_page)) # page for page in range(min_page, max_page, 1): if API_MODE == 'tyc': url = '%s/p%s?key=%s' % (TYC_SEARCH_API, page, parse.quote(key)) elif API_MODE == 'pro': url = '%s/p%s?key=%s' % (TYC_PRO_SEARCH_API, page, parse.quote(key)) else: LOG.critical('====== API_MODE is not in [tyc, pro] ======') sys.exit(1) LOG.info('%s[%s]%s' % (key, API_MODE, url)) is_ok, search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') if not is_ok: continue soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) def while_req(url): sub_is_ok, sub_search_resp = api_get(url=url, headers=self.headers, data={}, resptype='text') return sub_is_ok, sub_search_resp # 添加手动验证功能 if len(tags) == 0: while 1: if is_ok and len(tags) > 0: break else: LOG.critical('验证############### %s ###############' % url) random_sleep(20, 25) is_ok, search_resp = while_req(url) soup = BeautifulSoup(search_resp, 'lxml') tags = soup.find_all( 'a', attrs={"tyc-event-ch": "CompanySearch.Company"}) for tag in tags: if not tag or not tag.attrs.get('href'): continue res_dict = dict() if API_MODE == 'tyc': tyc_url = tag.get('href').strip() elif API_MODE == 'pro': tyc_url = '%s%s/background' % (TYC_PRO_DETAIL_API, tag.get('href').strip()) else: tyc_url = '' res_dict['tyc_url'] = tyc_url res_dict['name'] = tag.get_text().strip() res_dict['key'] = key detail_res = list() if API_MODE == 'tyc': detail_res = self.detail_by_url(res_dict.get('tyc_url')) elif API_MODE == 'pro': detail_res = self.detail_pro_by_url( res_dict.get('tyc_url')) res_dict.update(detail_res) print('%s[%s] %s' % (res_dict['name'], str(True if res_dict else False), res_dict['tyc_url'])) ret_res.append(res_dict) if queue: queue.put(res_dict) random_sleep(3.2, 4.5) if IS_TEST_BREAK: break if IS_TEST_BREAK: break return ret_res
def not_found_error(error): LOG.error("%s is not found 404" % request.url) return render_template('errors/404.html', ), 404
def server_error(error): LOG.error("%s is server error 500" % request.url) return render_template('errors/500.html'), 500