def readnCoVFromTencent(): logger.info('开始抓取疫情数据') result = {'data': [], 'updateTime': datetime(2020, 1, 1)} try: lastTime = getLastestUpdateTime() url = 'https://service-f9fjwngp-1252021671.bj.apigw.tencentcs.com/release/pneumonia' # url = 'http://lab.isaaclin.cn/nCoV/api/area?latest=0' r = requests.get(url, timeout=10) data = r.json()['data'] totalData = convertTotalData(data['statistics']) if totalData.updateTime <= lastTime: logger.warning('数据未更新, ' + str(totalData.updateTime)) return result dataList = [totalData] dataList.extend( convertProvinceList(data['listByArea'], totalData.updateTime)) dataList.extend( convertOtherCountryList(data['listByOther'], totalData.updateTime)) return {'data': dataList, 'updateTime': totalData.updateTime} except Exception as e: logger.error('readnCoVFromTencent error, ' + str(e)) return result
def crawlarea(): dataList = readnAreaFromTencent() if len(dataList) == 0: logger.warning('没有采集到数据') return False try: logger.info('开始写入数据...') counter = 0 for item in dataList: # counter += 1 if item.level != 'country': pos = readPositionFromBaidu(item.name, item.parentName) if pos: item.longitude = pos["lng"] item.latitude = pos["lat"] db.session.add(item) db.session.commit() # if counter > 10: # break logger.info('写入数据完成...') return True except BaseException as e: logger.error('抓取发生异常,' + str(e)) return False
def crawl(): try: do_crawl() schedule.every(15).minutes.do(do_crawl) while True: schedule.run_pending() time.sleep(1) except BaseException as e: logger.error('主循环异常退出, '+ str(e))
def updateToDayCaches(datalogList): logger.info('开始更新到缓存表...') try: for data in datalogList: updateOneDayCachesLog(data) except BaseException as e: logger.error('写入缓存表发生异常' + str(e)) return False logger.info('完成缓存表更新') return True
def readOverallDataFromIsaaclin(): logger.info('抓取全局疫情数据') try: url = 'https://lab.isaaclin.cn/nCoV/api/overall?latest=1' r = requests.get(url, timeout=10) dataList = r.json()['results'] return convertOverallDataList(dataList[0]) except Exception as e: logger.error('readnOverallDataFromIsaaclin error,' + str(e)) return []
def readProvinceDataFromIsaaclin(): logger.info('开始抓取疫情数据') try: url = 'https://lab.isaaclin.cn/nCoV/api/area?latest=0' r = requests.get(url, timeout=10) dataList = r.json()['results'] return convertProvinceList(dataList) except Exception as e: logger.error('readnProvinceDataFromIsaaclin error,' + str(e)) return []
def readnAreaFromTencent(): logger.info('开始抓取区域数据') try: url = 'https://service-f9fjwngp-1252021671.bj.apigw.tencentcs.com/release/pneumonia' r = requests.get(url, timeout=10) data = r.json()['data'] dataList = [] dataList.extend(convertProvinceList(data['listByArea'])) dataList.extend(convertOtherCountryList(data['listByOther'])) return dataList except Exception as e: logger.error('readnAreaFromTencent error, ' +str(e)) return []
def crawlprovincehistory(): dataList = readProvinceDataFromIsaaclin() if len(dataList) == 0: logger.warning('没有采集到数据') return False try: logger.info('开始写入数据...') for item in dataList: db.session.add(item) db.session.commit() logger.info('写入数据完成...') return True except BaseException as e: logger.error('抓取发生异常, ' +str(e)) return False
def readnCovFromIsasclin(): logger.info('开始抓取疫情数据') result = {'data':[], 'updateTime': datetime(2020,1,1)} try: lastTime = getLastestUpdateTime() totalData = readOverallDataFromIsaaclin() if totalData.updateTime <= lastTime: logger.warning('数据未更新, ' + str(totalData.updateTime)) return result dataList = [totalData] time.sleep(1) dataList.extend(readProvinceDataFromIsaaclin(totalData.updateTime)) return { 'data': dataList, 'updateTime': totalData.updateTime } except Exception as e: logger.error('readnCoVFromTencent error, ' +str(e)) return result
def beforeRequest(): try: paths = request.path.split('/') if len(paths) <= 2: logger.info('beforeRequest, illigle path, %s', paths) return logTime = datetime.now() params = ' '.join(paths[2:]) if paths[1] == 'apilog': return aLog = ApiLog(logTime=logTime, api=paths[1], params=params, remoteAddr=request.remote_addr) db.session.add(aLog) db.session.commit() except BaseException as e: logger.error('beforeRequest 异常, %s, %s', request, str(e)) return
def do_crawl(): # data = readnCoVFromTencent() data = readnCovFromIsasclin() if len(data['data']) == 0: logger.warning('没有采集到数据') return False try: logger.info('开始写入数据...') if not updateToDayCaches(data['data']): return False for item in data['data']: db.session.add(item) db.session.commit() updateUpdateTime(data['updateTime']) logger.info('写入数据完成...') return True except BaseException as e: logger.error('抓取发生异常' + str(e)) return False
def readPositionFromBaidu(name, parent): logger.info('读取%s %s经纬度', parent, name) try: url = f'http://api.map.baidu.com/geocoding/v3/?address={name}&city={parent}&output=json&ak=<your key>' # logger.info(url) r = requests.get(url, timeout=10) # logger.info('result %s', r.text) data = r.json() if data["status"] != 0: logger.error('读取%s %s经纬度信息失败, %s', parent, name, data) return None data = data['result'] if data["level"] not in ["国家", "省份", "城市", "区县"]: logger.error('读取%s经纬度信息失败,返回数据级别不对, level=%s', name, data["level"]) return None return data["location"] except Exception as e: logger.error('readPositionFromBaidu error, %s, %s,', name, str(e)) return None