def save_predict_history(): logger.info('开始保存预测记录') data = predict() data.pop('world') dic = {} for area, results in data.items(): for r in results: r['area'] = area if r['date'] in dic: dic[r['date']].append(r) else: dic[r['date']] = [r] insert_data = {} for date, ret in dic.items(): items = {} for i in ret: area = i.pop('area') items[area] = i insert_data[date] = items for k, v in insert_data.items(): if k == today: forcast_result = json.dumps(v, ensure_ascii=False, cls=CJsonEncoder) nCoV_dxy.update_one( {'date': today.strftime('%m.%d')}, {'$set': { 'forcast_result': json.loads(forcast_result) }}) logger.info('保存历史预测完成') break
def ncov_task3(): ''' 23:58执行保存当天预测数据 :return: ''' print( f'[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] save_predict_history' ) logger.info( f'[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] save_predict_history' ) tasks.save_predict_history()
def ncov_task2(): ''' 9:58分执行丁香园疫情数据抓取 :return: ''' print( f'[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] crawl_dxy' ) logger.info( f'[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] crawl_dxy' ) tasks.crawl_dxy()
def ncov_task1(): ''' 定时更新dxy全国全国数据 :return: ''' print( f'[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] update_overall' ) logger.info( f'[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}] update_overall' ) tasks.update_overall_timed()
def save_predict_history_remedial(): ''' 当前一天的历史记录保存失败时 :return: ''' import requests res = requests.get( 'https://innovaapi.aminer.cn/covid/api/v1/pneumonia/prediction') # with open('prediction.json', 'w') as f: # f.write(res.text) logger.info('开始保存预测记录') data = res.json()['data'] data.pop('world') dic = {} for area, results in data.items(): for r in results: r['area'] = area if r['date'] in dic: dic[r['date']].append(r) else: dic[r['date']] = [r] insert_data = {} for date, ret in dic.items(): items = {} for i in ret: area = i.pop('area') items[area] = i insert_data[date] = items insert_time = today + timedelta(days=-1) insert_time_str = insert_time.strftime('%Y-%m-%d') for k, v in insert_data.items(): if k <= insert_time_str: if nCoV_dxy.find_one({ 'date': '.'.join(k.split('-')[1:]), 'forcast_result': { '$exists': False } }): print(k) forcast_result = json.dumps(v, ensure_ascii=False, cls=CJsonEncoder) print(forcast_result) nCoV_dxy.update_one( {'date': '.'.join(k.split('-')[1:])}, {'$set': { 'forcast_result': json.loads(forcast_result) }}) logger.info('保存历史预测完成')
def dojob(): # 创建调度器:BlockingScheduler scheduler = BlockingScheduler() # 添加任务,时间间隔2S now = datetime.datetime.now() new_time = now.replace(hour=now.hour + 1, minute=1) scheduler.add_job(ncov_task1, 'interval', minutes=60, id='update_overall', next_run_time=new_time) # 添加任务,时间间隔5S scheduler.add_job(ncov_task2, 'cron', hour=9, minute=58, id='crawl_dxy') scheduler.add_job(ncov_task3, 'cron', hour=18, minute=30, id='save_predict') # scheduler.add_listener(my_listener, EVENT_JOB_EXECUTED|EVENT_JOB_ERROR) # scheduler._logger = logging.getLogger('task') logger.info('scheduler start ') scheduler.start()
def my_listener(event): if event.exception: logger.info(event.job_id) logger.info(event.exception) em = EmailTools(sender_name, sender_url, sender_pwd, receiver_name, receiver_urls, smtp) em.send_email(f'{event.job_id}: \n{event.exception}', '定时任务错误') else: logger.info(f'{event.job_id}: 正常进行') if event.job_id == 'crawl_dxy' or event.job_id == 'save_predict': em = EmailTools(sender_name, sender_url, sender_pwd, receiver_name, receiver_urls, smtp) em.send_email(f'{event.job_id}: \n执行成功!!!', '定时任务成功')
def crawl_dxy(): logger.info("抓取丁香园") yesterday = (date.today() + timedelta(days=-1)).strftime('%m.%d') crawler = Crawler() # overall, area = crawler.crawler(True) count = 0 overall, area = {}, {} while count < 20: overall, area = crawler.crawler(crawl=True) if overall: logger.info('抓取成功') break time.sleep(3) logger.error('失败重试') count += 1 yesterday_data = nCoV_dxy.find_one({'date': yesterday}) if overall and area: province_dic = {} cities_dic = {} if yesterday_data: yesterday_area = yesterday_data['area'] for ya in yesterday_area: province_dic[ya['provinceName']] = ya for yc in ya['cities']: cities_dic[f"{ya['provinceName']}_{yc['cityName']}"] = yc if province_dic and cities_dic: for ta in area: if ta['provinceName'] in province_dic: ta['confirmedAddCount'] = ta[ 'confirmedCount'] - province_dic[ ta['provinceName']]['confirmedCount'] ta['suspectedAddCount'] = ta[ 'suspectedCount'] - province_dic[ ta['provinceName']]['suspectedCount'] ta['curedAddCount'] = ta['curedCount'] - province_dic[ ta['provinceName']]['curedCount'] ta['deadAddCount'] = ta['deadCount'] - province_dic[ ta['provinceName']]['deadCount'] for tc in ta['cities']: if f"{ta['provinceName']}_{tc['cityName']}" in cities_dic: tc['confirmedAddCount'] = tc['confirmedCount'] - \ cities_dic[f"{ta['provinceName']}_{tc['cityName']}"][ 'confirmedCount'] tc['suspectedAddCount'] = tc['suspectedCount'] - \ cities_dic[f"{ta['provinceName']}_{tc['cityName']}"][ 'suspectedCount'] tc['curedAddCount'] = tc['curedCount'] - \ cities_dic[f"{ta['provinceName']}_{tc['cityName']}"]['curedCount'] tc['deadAddCount'] = tc['deadCount'] - cities_dic[ f"{ta['provinceName']}_{tc['cityName']}"][ 'deadCount'] hubei_confirm = area[0]['confirmedCount'] nohubei_confirm = sum([i['confirmedCount'] for i in area[1:]]) nohuei_cureCount = sum([i['curedCount'] for i in area[1:]]) nohuei_cureIncr = sum([i['curedAddCount'] for i in area[1:]]) data = {} data['overall'] = overall data['overall_new'] = overall data['area'] = area data['hubei_confirm'] = hubei_confirm data['nohubei_confirm'] = nohubei_confirm data['nohubei_cureCount'] = nohuei_cureCount data['nohubei_cureIncr'] = nohuei_cureIncr data['hubei_cureCount'] = area[0]['curedCount'] data['hubei_cureIncr'] = area[0]['curedAddCount'] data['date'] = datetime.now().strftime('%m.%d') data['save_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') data['source'] = 'dxy' exists = nCoV_dxy.find_one( {'date': datetime.now().strftime('%m.%d')}) if exists: if 'overall_new' in data: data.pop('overall_new') nCoV_dxy.update_one({'date': datetime.now().strftime('%m.%d')}, {'$set': data}) else: nCoV_dxy.insert_one(data) logger.info("丁香园抓取完成")
def update_overall_timed(): logger.info("更新实时数据") crawler = Crawler() count = 0 overall, area = {}, {} while count < 20: overall, area = crawler.crawler(crawl=True) if overall: logger.info('抓取成功') break time.sleep(3) logger.error('失败重试') count += 1 today = datetime.now().strftime('%m.%d') t_data = nCoV_dxy.find_one({'date': today}) if overall and area and t_data: if 'area' in t_data: province_dic = {} cities_dic = {} for ya in t_data['area']: province_dic[ya['provinceName']] = ya for yc in ya['cities']: cities_dic[f"{ya['provinceName']}_{yc['cityName']}"] = yc if province_dic and cities_dic: for ta in area: if ta['provinceName'] in province_dic: ta['confirmedAddCount'] = province_dic[ ta['provinceName']]['confirmedAddCount'] ta['suspectedAddCount'] = province_dic[ ta['provinceName']]['suspectedAddCount'] ta['curedAddCount'] = province_dic[ ta['provinceName']]['curedAddCount'] ta['deadAddCount'] = province_dic[ ta['provinceName']]['deadAddCount'] for tc in ta['cities']: if f"{ta['provinceName']}_{tc['cityName']}" in cities_dic: tc['confirmedAddCount'] = cities_dic[ f"{ta['provinceName']}_{tc['cityName']}"].get( 'confirmedAddCount', '') tc['suspectedAddCount'] = cities_dic[ f"{ta['provinceName']}_{tc['cityName']}"].get( 'suspectedAddCount', '') tc['curedAddCount'] = cities_dic[ f"{ta['provinceName']}_{tc['cityName']}"].get( 'curedAddCount', '') tc['deadAddCount'] = cities_dic[ f"{ta['provinceName']}_{tc['cityName']}"].get( 'deadAddCount', '') nCoV_dxy.update_one( {'date': today}, {'$set': { 'overall_new': overall, 'area_new': area }}) elif overall and area and not t_data: nCoV_dxy.insert_one({ 'date': today, 'overall_new': overall, 'area_new': area }) logger.info('实时数据更新完成')