def allTyphoonCrawler(): postgreCommand = PostgreCommand() postgreCommand.connectPostgre() dataResult = {} currentList = [] currentDataList = get_html(get_url('TF_09')) for item in currentDataList: currentList.append(item['TYPHOON_ID']) dataList = get_html(get_url('TF_01')) for dataItem in dataList: dataResult['typhoon_id'] = dataItem['TYPHOON_ID'] dataResult['chn_name'] = dataItem['CHN_NAME'] dataResult['eng_name'] = dataItem['ENG_NAME'] dataResult['typhoon_year'] = dataItem['TYPHOON_YEAR'] dataResult['update_time'] = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') if dataItem['TYPHOON_ID'] in currentList: dataResult['typhon_status'] = '1' else: dataResult['typhon_status'] = '0' postgreCommand.allTyphoonInsertData(dataResult) postgreCommand.closePostgre() timrFor = Timer(1 * 1 * 3 * 60, allTyphoonCrawler) timrFor.start()
def sckjCloudCrawler(): result = {} dataJson = get_html() postgreCommand = PostgreCommand() postgreCommand.connectPostgre() for item in dataJson: filePath = './cloud_img/' img_list = os.listdir(filePath) if item['name'] in img_list: print('图片数据已存在') continue urlretrieve(item['url'], './cloud_img/' + item['name']) # with open('./cloud_img/' + item['name'], 'rb') as f: # base64_data = base64.b64encode(f.read()) # c_bs = base64_data.decode() update_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') timeStr = item['name'].replace('.png', '') timeDate = datetime.datetime.strptime(timeStr, '%Y%m%d%H%M') release_time = datetime.datetime.strftime(timeDate, "%Y-%m-%d %H:%M:%S") result['c_name'] = item['name'] result['c_url'] = item['url'] result['c_bs'] = item['name'] result['md5'] = item['md5'] result['dt'] = item['dt'] result['update_time'] = update_time result['release_time'] = release_time postgreCommand.sckjCloudInsertData(result) postgreCommand.closePostgre()
def previewTyphoonCrawler_copy(): postgreCommand = PostgreCommand() postgreCommand.connectPostgre() dataResult = {} requestUrlList = get_url() for item in requestUrlList: dataJsonList = get_html(item) for dataItem in dataJsonList: dataResult['pid'] = dataItem['PID'] dataResult['typhoon_id'] = dataItem['TYPHOON_ID'] dataResult['forcast_country_name'] = dataItem[ 'FORCAST_COUNTRY_NAME'] dataResult['ybsj'] = dataItem['YBSJ'] dataResult['rqsj'] = dataItem['RQSJ'] dataResult['jd'] = dataItem['JD'] dataResult['wd'] = dataItem['WD'] dataResult['conter_pa'] = dataItem['CONTER_PA'] dataResult['center_wind'] = dataItem['CENTER_WIND'] dataResult['seven_wind'] = dataItem['SEVEN_WIND'] dataResult['ten_wind'] = dataItem['TEN_WIND'] dataResult['move_direct'] = dataItem['MOVE_DIRECT'] dataResult['depict'] = dataItem['DEPICT'] dataResult['tid'] = dataItem['TID'] dataResult['line_color'] = dataItem['LINE_COLOR'] postgreCommand.previewInsertData_copy(dataResult) postgreCommand.closePostgre()
def allPreviewTyphoonCrawler(): postgreCommand = PostgreCommand() postgreCommand.connectPostgre() dataResult = {} requestUrlList = get_url() for item in requestUrlList: dataJsonList = get_html(item['requestUrl']) if dataJsonList != []: postgreCommand.deletPreviewInsertData(item['args'], item['address']) for dataItem in dataJsonList: dataResult['pid'] = dataItem['PID'] dataResult['typhoon_id'] = dataItem['TYPHOON_ID'] dataResult['forcast_country_name'] = dataItem[ 'FORCAST_COUNTRY_NAME'] dataResult['ybsj'] = dataItem['YBSJ'] dataResult['rqsj'] = dataItem['RQSJ'] dataResult['jd'] = dataItem['JD'] dataResult['wd'] = dataItem['WD'] dataResult['conter_pa'] = dataItem['CONTER_PA'] dataResult['center_wind'] = dataItem['CENTER_WIND'] dataResult['seven_wind'] = dataItem['SEVEN_WIND'] dataResult['ten_wind'] = dataItem['TEN_WIND'] dataResult['move_direct'] = dataItem['MOVE_DIRECT'] dataResult['depict'] = dataItem['DEPICT'] dataResult['tid'] = dataItem['TID'] dataResult['line_color'] = dataItem['LINE_COLOR'] postgreCommand.allPreviewInsertData(dataResult) postgreCommand.closePostgre() timrFor = Timer(1 * 1 * 3 * 60, allPreviewTyphoonCrawler) timrFor.start()
def windFarmCrawler(): postgreCommand = PostgreCommand() postgreCommand.connectPostgre() postgreCommand.windFarmInsertData(get_html()) postgreCommand.closePostgre() #windFarmCrawler()
def collapse_ZH005(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://search.sina.com.cn/?q=%C9%BD%CC%E5%B1%C0%CB%FA&range=title&c=news&sort=time' infos_paser(url) except Exception as e: print("collapse_ZH002访问网站失败", str(e)) postgreCommand.closePostgre()
def volcanic_ZH001(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.tianqi.com/tag/%BB%F0%C9%BD%B1%AC%B7%A2/' infos_paser(url) except Exception as e: print("volcanic_ZH001访问网站失败", str(e)) postgreCommand.closePostgre()
def debrisFlow_ZH004(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://search.sina.com.cn/?q=%C4%E0%CA%AF%C1%F7%D4%D6%BA%A6&range=title&c=news&sort=time' infos_paser(url) except Exception as e: print("debrisFlow_ZH004访问网站失败", str(e)) postgreCommand.closePostgre()
def typhoon_ZH004(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.tianqi.com/tag/%CC%A8%B7%E7%D7%EE%D0%C2%CF%FB%CF%A2/' infos_paser(url) except Exception as e: print("typhoon_ZH004访问网站失败", str(e)) postgreCommand.closePostgre()
def rainstorm_ZH001(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.cibeicn.com/topic/list.aspx?key=%E6%9A%B4%E9%9B%A8&pageIndex=1' infos_paser(url) except Exception as e: print("rainstorm_ZH001访问网站失败", str(e)) postgreCommand.closePostgre()
def rainstorm_ZH002(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.qxkp.net/zhfy/byhl/' infos_paser(url) except Exception as e: print("rainstorm_ZH002访问网站失败", str(e)) postgreCommand.closePostgre()
def drought_ZH002(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.qxkp.net/zhfy/ghzh/' infos_paser(url) except Exception as e: print("drought_ZH002访问网站失败", str(e)) postgreCommand.closePostgre()
def landslide_ZH001(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.cibeicn.com/topic/list.aspx?key=%E6%BB%91%E5%9D%A1' infos_paser(url) except Exception as e: print("landslide_ZH001访问网站失败", str(e)) postgreCommand.closePostgre()
def drought_ZH001(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://search.cctv.com/search.php?qtext=%E5%B9%B2%E6%97%B1&type=web' infos_paser(url) except Exception as e: print("drought_ZH001访问网站失败", str(e)) postgreCommand.closePostgre()
def typhoon_ZH002(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://search.sina.com.cn/?q=%CC%A8%B7%E7%D4%D6%BA%A6&range=title&c=news&sort=time' infos_paser(url) except Exception as e: print("typhoon_ZH002访问网站失败", str(e)) postgreCommand.closePostgre()
def typhoon_ZH001(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://typhoon.nmc.cn/weatherservice/typhoon/jsons/list_2019?t=1557472608958&callback=typhoon_jsons_list_2019' get_html(url) except Exception as e: print("typhoon_ZH001访问网站失败", str(e)) postgreCommand.closePostgre()
def debrisFlow_ZH002(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://www.baidu.com/s?tn=news&rtt=1&bsst=1&cl=2&wd=%E6%B3%A5%E7%9F%B3%E6%B5%81&medium=0' infos_paser(url) except Exception as e: print("debrisFlow_ZH002访问网站失败", str(e)) postgreCommand.closePostgre()
def forestFire_ZH005(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.tianqi.com/tag/%C9%AD%C1%D6%BB%F0%D4%D6/' infos_paser(url) except Exception as e: print("forestFire_ZH005访问网站失败", str(e)) postgreCommand.closePostgre()
def rainstorm_ES001(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://severe.worldweather.wmo.int/thunder' infos_paser_One(url) except Exception as e: print("rainstorm_ES001访问网站失败", str(e)) postgreCommand.closePostgre()
def tsunami_ZH001(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.oceanguide.org.cn/hyyj/map/boreList.htm?type=bore' infos_paser(url) except Exception as e: print("tsunami_ZH001访问网站失败", str(e)) postgreCommand.closePostgre()
def forestFire_ZH002(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://www.baidu.com/s?ie=utf-8&cl=2&medium=0&rtt=1&bsst=1&rsv_dl=news_t_sk&tn=news&word=%E6%A3%AE%E6%9E%97%E7%81%AB%E7%81%BE&rsv_sug3=22&rsv_sug4=626&rsv_sug1=16&rsv_sug2=0&inputT=7419' infos_paser(url) except Exception as e: print("forestFire_ZH002访问网站失败", str(e)) postgreCommand.closePostgre()
def thunderstorm_ZH001(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.tianqi.com/tag/%C0%D7%B1%A9%B4%F3%B7%E7/' infos_paser(url) except Exception as e: print("thunderstorm_ZH001访问网站失败", str(e)) postgreCommand.closePostgre()
def stormSurge_ZH002(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://www.baidu.com/s?tn=news&rtt=4&bsst=1&cl=2&wd=%E9%A3%8E%E6%9A%B4%E6%BD%AE&medium=0' infos_paser(url) except Exception as e: print("stormSurge_ZH002访问网站失败", str(e)) postgreCommand.closePostgre()
def rainstorm_ZH006(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.tianqi.com/tag/%B1%A9%D3%EA/' infos_paser(url) except Exception as e: print("rainstorm_ZH006访问网站失败", str(e)) postgreCommand.closePostgre()
def tsunami_ZH003(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://so.dzwww.com/web/search?searchscope=DOCTITLE×cope=×copecolumn=&orderby=LIFO&channelid=205667&andsen=&total=&orsen=&exclude=&searchword=%E6%B5%B7%E5%95%B8&perpage=&templet=&token=&timeline=' infos_paser(url) except Exception as e: print("tsunami_ZH003访问网站失败", str(e)) postgreCommand.closePostgre()
def stormSurge_ZH005(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://search.cctv.com/search.php?qtext=%E9%A3%8E%E6%9A%B4%E6%BD%AE&type=web' infos_paser(url) except Exception as e: print("stormSurge_ZH005访问网站失败", str(e)) postgreCommand.closePostgre()
def collapse_ZH004(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://so.dzwww.com/web/search?searchscope=DOCTITLE×cope=×copecolumn=&orderby=LIFO&channelid=205667&andsen=&total=&orsen=&exclude=&searchword=%E5%B1%B1%E4%BD%93%E5%B4%A9%E5%A1%8C&perpage=&templet=&token=&timeline=' infos_paser(url) except Exception as e: print("collapse_ZH002访问网站失败", str(e)) postgreCommand.closePostgre()
def tsunami_ZH005(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'https://search.cctv.com/search.php?qtext=%E6%B5%B7%E5%95%B8&type=web' infos_paser(url) except Exception as e: print("tsunami_ZH005访问网站失败", str(e)) postgreCommand.closePostgre()
def earthquake_ES002(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://ds.iris.edu/seismon/eventlist/index.phtml' infos_paser(url) except Exception as e: print("earthquake_ES002访问网站失败", str(e)) postgreCommand.closePostgre()
def landslide_ZH005(): global postgreCommand postgreCommand = PostgreCommand() postgreCommand.connectPostgre() try: url = 'http://www.tianqi.com/tag/%C9%BD%CC%E5/' infos_paser(url) except Exception as e: print("landslide_ZH005访问网站失败", str(e)) postgreCommand.closePostgre()