def new_enterprise_main(): """ 获取新增企业数据 :return: """ global device_id, tim, sign, header device_id, tim, sign, header = more_get_token() r = connect_redis(0, 110) for url, city, province in creat_url(): handle_page(url, city, province, r) sleep(1.5)
def handle_page(url, city, province, r): """ 处理每一个页面的url以及数据 :param url: :param city: :param province: :param r: :return: """ global device_id, tim, sign, header, is_break res = requests.get(url, headers=header) res_data = dict( eval( res.text.replace('false', 'False').replace('true', 'True').replace('null', 'None'))) if '200' not in str(res_data): while '200' not in str(res_data): sleep(10) sign_tmp = sign tim_tmp = tim print('handle_page', res.text) print('权限不足或者accessToken失效,sign失败') device_id, tim, sign, header = more_get_token() url = url.replace(sign_tmp, sign).replace().replace(tim_tmp, tim) res = requests.get(url, headers=header, timeout=10) res_data = dict( eval( res.text.replace('false', 'False').replace('true', 'True').replace( 'null', 'None'))) res.close() try: qiye_data = res_data.get('result').get('Result') except Exception as e: print(e) return if not qiye_data: is_break = True return write_list = [] for qiye in qiye_data: if qiye.get('StartDate') != get_yesterday(): is_break = True continue qiye['City'] = city qiye['Province'] = province del qiye['ImageUrl'] del qiye['HitReason'] write_list.append(qiye) r.set(province + ":" + city + ':' + qiye.get('KeyNo'), str(qiye)) threading.Thread(target=write_data, args=(get_yesterday() + "-data.txt", write_list)).start()
def handle_page(url, city, province, r): """ 处理每一个页面的url以及数据 :param url: :param city: :param province: :param r: :return: """ global device_id, tim, sign, header res = requests.get(url, headers=header) print(city, province) res_data = dict( eval( res.text.replace('false', 'False').replace('true', 'True').replace('null', 'None'))) if '200' not in str(res_data): while '200' not in str(res_data): sleep(10) print('handle_page', res.text) print('权限不足或者accessToken失效,sign失败') device_id, tim, sign, header = more_get_token() res = requests.get(url, headers=header, timeout=10) res_data = dict( eval( res.text.replace('false', 'False').replace('true', 'True').replace( 'null', 'None'))) res.close() try: qiye_data = res_data.get('result').get('Result') except Exception as e: print(e) return for qiye in qiye_data: if qiye.get('StartDate') != get_yesterday(): continue qiye['City'] = city qiye['Province'] = province r.set(province + city + ':' + qiye.get('KeyNo'), str(qiye))
@Time : 2019-06-18 17:57 @Author : Xincheng.Zhao @Desc : 获取每天新增企业 @Email : [email protected] @File : getnewdata.py """ import threading from multiprocessing import Process from time import sleep import requests from apscheduler.schedulers.blocking import BlockingScheduler from common.utils import connect_redis, get_yesterday, read_data, more_get_token, write_data device_id, tim, sign, header = more_get_token() is_break = False def new_enterprise_main(): """ 获取新增企业数据 :return: """ global device_id, tim, sign, header device_id, tim, sign, header = more_get_token() r = connect_redis(0, 110) for url, city, province in creat_url(): handle_page(url, city, province, r) sleep(1.5)