def analyzeInfo_one(item): result = {} a_title = item.find_all('a') result['disasterid'] = '10107' #类别:暴雨 result['link'] = 'http://www.cibeicn.com' + a_title[0]['href'] # 新闻链接 source = get_source(result['link']) result['source'] = re.findall(r'来源:(.+)', source)[0] #新闻来源 result['originalText'] = get_original(result['link']) # 新闻原文 release = get_releaseTime(result['link']) time_str1 = re.sub("\D", "", release) datetime_struct1 = parser.parse(time_str1) releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S') result['releaseTime'] = releaseTime # 发布时间 strong_info_list = item.find('strong') if strong_info_list == None: a_info_list = a_title[0].get_text().strip() result['title'] = a_info_list # 标题 else: result['title'] = strong_info_list.get_text().strip() originalText = result['title'] + ',' + result['originalText'] latlngadd_tuple = address.placeMany(originalText) result['place'] = latlngadd_tuple[0] #发生地点 result['longitude'] = str(latlngadd_tuple[1]) #地点经度 result['latitude'] = str(latlngadd_tuple[2]) #地点纬度 result['strength'] = '' #灾害强度 result['occurTime'] = result['releaseTime'] #发生时间 death = toYc.death(originalText) injured = toYc.Injured(originalText) lossNumber = toYc.loss(originalText) result['loss'] = str(lossNumber) #经济损失 result['injured'] = str(injured) #受伤人数 result['death'] = str(death) #死亡人数 result['pictures'] = '' #多个路径之间用分号隔开 result['more'] = '' #特殊字段 result['regional'] = '国内' result['province'] = latlngadd_tuple[3] #灾害发生的一级行政区划 result['country'] = latlngadd_tuple[4] #灾害发生国家 result['current_website'] = '防灾网' #灾害当前网站 result['isreleasetime'] = '1' #灾害发生时间是否是用发布时间代替 resultSun = {} resultSun['title'] = result['title'] resultSun['originalText'] = result['originalText'] resultSun['pictures'] = result['pictures'] try: title = 'rainstorm_ZH001' res = postgreCommand.insertData(result, resultSun, title) if res == 1: print(title, '数据插入成功!') elif res == 0: print(title, '数据更新成功!') except Exception as e: print("插入数据失败", str(e))
def analyzeInfo(item): result = {} h3_list = item.find_all('h3', limit=1) a_list = h3_list[0].find_all('a', limit=1) div_list = item.find('div', attrs={'class': 'c-summary c-row ' }) #.find('p').get_text().split() p_list = div_list.find('p').get_text().split() time_str = re.sub("\D", "", p_list[1] + p_list[2]) datetime_struct1 = parser.parse(time_str) releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S') result['link'] = a_list[0]['href'] #新闻链接 result['title'] = a_list[0].get_text().strip() #新闻标题 result['releaseTime'] = releaseTime #发布时间 result['disasterid'] = '10201' #灾害类型 originalList = get_original(result['link']) result['source'] = originalList[0] #新闻来源 result['originalText'] = originalList[1] #新闻原文 result['pictures'] = originalList[2] #新闻图片 originalText = result['title'] + ',' + result['originalText'] latlngadd_tuple = address.placeMany(originalText) result['place'] = latlngadd_tuple[0] #发生地点 result['longitude'] = str(latlngadd_tuple[1]) #地点经度 result['latitude'] = str(latlngadd_tuple[2]) #地点纬度 death = toYc.death(originalText) injured = toYc.Injured(originalText) lossNumber = toYc.loss(originalText) result['loss'] = str(lossNumber) #经济损失 result['injured'] = str(injured) #受伤人数 result['death'] = str(death) #死亡人数 result['province'] = latlngadd_tuple[3] #灾害发生的一级行政区划 result['country'] = latlngadd_tuple[4] #灾害发生国家 result['strength'] = '' result['occurTime'] = result['releaseTime'] #多个路径之间用分号隔开 result['more'] = '' #特殊字段 result['regional'] = '国内' #新闻发布地区 #灾害发生国家 result['current_website'] = '百度新闻' #灾害当前网站 result['isreleasetime'] = '1' #灾害发生时间是否是用发布时间代替 result['isrellonandlat'] = '0' resultSun = {} resultSun['title'] = result['title'] resultSun['originalText'] = result['originalText'] resultSun['pictures'] = result['pictures'] try: title = 'stormSurge_ZH002' res = postgreCommand.insertData(result, resultSun, title) if res == 1: print(title, '数据插入成功!') elif res == 0: print(title, '数据更新成功!') except Exception as e: print("插入数据失败", str(e))
def analyzeInfo(item): result = {} a_list = item.find_all('a', limit=1) h3_list = item.find_all('h3', attrs={'class': 'tit'}, limit=1) span_link_list = h3_list[0].find_all('span', limit=1) div_list = item.find_all('div', attrs={'class': 'src-tim'}, limit=1) span2_list = div_list[0].find_all('span', attrs={'class': 'tim'}, limit=1) time_str = re.sub('\D', "", span2_list[0].get_text().strip()) datetime_struct = parser.parse(time_str) releaseTime = datetime_struct.strftime('%Y-%m-%d %H:%M:%S') result['disasterid'] = '10201' #新闻类别 result['link'] = span_link_list[0]['lanmu1'] #新闻链接 result['title'] = a_list[0].get_text().strip() #新闻标题 result['releaseTime'] = releaseTime #发布时间 analyze = analyzeInfoSun(result['link']) result['source'] = analyze[1] #新闻来源 result['originalText'] = analyze[0] #新闻原文 originalText = result['title'] + ',' + result['originalText'] latlngadd_tuple = address.placeMany(originalText) result['place'] = latlngadd_tuple[0] #发生地点 result['longitude'] = str(latlngadd_tuple[1]) #地点经度 result['latitude'] = str(latlngadd_tuple[2]) #地点纬度 result['strength'] = '' #灾害强度 result['occurTime'] = result['releaseTime'] #发生时间 death = toYc.death(originalText) injured = toYc.Injured(originalText) lossNumber = toYc.loss(originalText) result['loss'] = str(lossNumber) #经济损失 result['injured'] = str(injured) #受伤人数 result['death'] = str(death) #死亡人数 result['pictures'] = analyze[2] #多个路径之间用分号隔开 result['more'] = '' #特殊字段 result['regional'] = '国内' result['province'] = latlngadd_tuple[3] #灾害发生的一级行政区划 result['country'] = latlngadd_tuple[4] #灾害发生国家 result['current_website'] = '央视网' #灾害当前网站 result['isreleasetime'] = '1' #灾害发生时间是否是用发布时间代替 result['isrellonandlat'] = '0' resultSun = {} resultSun['title'] = result['title'] resultSun['originalText'] = result['originalText'] resultSun['pictures'] = result['pictures'] try: title = 'stormSurge_ZH005' res = postgreCommand.insertData(result, resultSun, title) if res == 1: print(title, '数据插入成功!') elif res == 0: print(title, '数据更新成功!') except Exception as e: print("插入数据失败", str(e))
def analyzeInfo_One(item): result = {} divs = item.find_all('div') title = divs[0].find('a').get_text().strip() link = 'http://www.qxkp.net' + divs[0].find('a')['href'] time_str1 = re.sub("\D", "", divs[1].get_text()) datetime_struct1 = parser.parse(time_str1) releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S') result['disasterid'] = '10107' #类别:暴雨 result['link'] = link # 新闻链接 resultSun = analyzeInfo_Two(link) result['source'] = resultSun['source'] #新闻来源 result['originalText'] = resultSun['originalText'] # 新闻原文 result['releaseTime'] = releaseTime # 发布时间 result['title'] = title # 标题 originalText = result['title'] + ',' + result['originalText'] latlngadd_tuple = address.placeMany(originalText) result['place'] = latlngadd_tuple[0] #发生地点 result['longitude'] = str(latlngadd_tuple[1]) #地点经度 result['latitude'] = str(latlngadd_tuple[2]) #地点纬度 result['strength'] = '' #灾害强度 result['occurTime'] = result['releaseTime'] #发生时间 death = toYc.death(originalText) injured = toYc.Injured(originalText) lossNumber = toYc.loss(originalText) result['loss'] = str(lossNumber) #经济损失 result['injured'] = str(injured) #受伤人数 result['death'] = str(death) #死亡人数 result['pictures'] = resultSun['pictures'] #多个路径之间用分号隔开 result['more'] = '' #特殊字段 result['regional'] = '国内' result['province'] = latlngadd_tuple[3] #灾害发生的一级行政区划 result['country'] = latlngadd_tuple[4] #灾害发生国家 result['current_website'] = '气象科普网' #灾害当前网站 result['isreleasetime'] = '1' #灾害发生时间是否是用发布时间代替 result['isrellonandlat'] = '0' resultSun = {} resultSun['title'] = result['title'] resultSun['originalText'] = result['originalText'] resultSun['pictures'] = result['pictures'] try: title = 'rainstorm_ZH002' res = postgreCommand.insertData(result, resultSun, title) if res == 1: print(title, '数据插入成功!') elif res == 0: print(title, '数据更新成功!') except Exception as e: print("插入数据失败", str(e))
def analyzeInfo(item): result = {} h4_list = item.find_all('h4', limit=1) a_list = h4_list[0].find_all('a', limit=1) i_list = item.find_all('i', limit=1) time_str = (i_list[0].get_text().strip())[3:] datetime_struct1 = parser.parse(time_str) releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S') result['link'] = a_list[0]['href'] #新闻链接 result['title'] = a_list[0].get_text().strip() #新闻标题 result['releaseTime'] = releaseTime #发布时间 originalList = get_original(result['link']) result['source'] = originalList[1] #新闻来源 result['originalText'] = originalList[0] #新闻原文 originalText = result['title'] + ',' + result['originalText'] latlngadd_tuple = address.placeMany(originalText) result['disasterid'] = '10107' #灾害类型 result['place'] = latlngadd_tuple[0] #发生地点 result['longitude'] = str(latlngadd_tuple[1]) #地点经度 result['latitude'] = str(latlngadd_tuple[2]) #地点纬度 result['strength'] = '' result['occurTime'] = result['releaseTime'] death = toYc.death(originalText) injured = toYc.Injured(originalText) lossNumber = toYc.loss(originalText) result['loss'] = str(lossNumber) #经济损失 result['injured'] = str(injured) #受伤人数 result['death'] = str(death) #死亡人数 result['pictures'] = originalList[2] #多个路径之间用分号隔开 result['more'] = '' #特殊字段 result['regional'] = '国内' #新闻发布地区 result['province'] = latlngadd_tuple[3] #灾害发生的一级行政区划 result['country'] = latlngadd_tuple[4] #灾害发生国家 result['current_website'] = '天气网' #灾害当前网站 result['isreleasetime'] = '1' #灾害发生时间是否是用发布时间代替 result['isrellonandlat'] = '0' resultSun = {} resultSun['title'] = result['title'] resultSun['originalText'] = result['originalText'] resultSun['pictures'] = result['pictures'] try: title = 'rainstorm_ZH006' res = postgreCommand.insertData(result, resultSun, title) if res == 1: print(title, '数据插入成功!') elif res == 0: print(title, '数据更新成功!') except Exception as e: print("插入数据失败", str(e))
def analyzeInfo(item): result = {} h2_list = item.find_all('h2', limit=1) a_list = h2_list[0].find_all('a') span_list = h2_list[0].find_all('span') span_new = span_list[0].get_text().strip().split() result['link'] = a_list[0]['href'] #新闻链接 result['title'] = a_list[0].get_text().strip() #新闻标题 result['releaseTime'] = span_new[1] + ' ' + span_new[2] #发布时间 originalList = get_original(result['link']) if originalList[3]: result['source'] = originalList[0] #新闻来源 result['originalText'] = originalList[1] #新闻原文 result['pictures'] = originalList[2] #新闻图片 result['disasterid'] = '10201' #灾害类型 originalText = result['title'] + ',' + result['originalText'] latlngadd_tuple = address.placeMany(originalText) result['place'] = latlngadd_tuple[0] #发生地点 result['longitude'] = str(latlngadd_tuple[1]) #地点经度 result['latitude'] = str(latlngadd_tuple[2]) #地点纬度 death = toYc.death(originalText) injured = toYc.Injured(originalText) lossNumber = toYc.loss(originalText) result['loss'] = str(lossNumber) #经济损失 result['injured'] = str(injured) #受伤人数 result['death'] = str(death) #死亡人数 result['province'] = latlngadd_tuple[3] #灾害发生的一级行政区划 result['country'] = latlngadd_tuple[4] #灾害发生国家 result['strength'] = '' result['occurTime'] = result['releaseTime'] #多个路径之间用分号隔开 result['more'] = '' #特殊字段 result[ 'regional'] = '国内' #新闻发布地区 #灾害发生国家 result['current_website'] = '新浪网' #灾害当前网站 result['isreleasetime'] = '1' #灾害发生时间是否是用发布时间代替 result['isrellonandlat'] = '0' resultSun = {} resultSun['title'] = result['title'] resultSun['originalText'] = result['originalText'] resultSun['pictures'] = result['pictures'] try: title = 'stormSurge_ZH004' res = postgreCommand.insertData(result, resultSun, title) if res == 1: print(title, '数据插入成功!') elif res == 0: print(title, '数据更新成功!') except Exception as e: print("插入数据失败", str(e))
def analyzeInfo(item): result = {} a_title = item.find_all('a') result['disasterid'] = '10201' #新闻类别:风暴潮 result['link'] = 'http://www.oceanguide.org.cn' + a_title[0]['href'] #新闻标题 result['title'] = a_title[0].get_text().strip() #新闻标题 time_str1 = re.sub("\D", "", item.find('p').get_text().strip()) datetime_struct1 = parser.parse(time_str1) releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S') result['releaseTime'] = releaseTime #发布时间 result['originalText'] = get_original(result['link']) #新闻原文 result['source'] = get_source(result['link']) #新闻来源 originalText = result['originalText'] + ',' + result['title'] latlngadd_tuple = address.placeMany(originalText) result['place'] = latlngadd_tuple[0] #发生地点 result['longitude'] = str(latlngadd_tuple[1]) #地点经度 result['latitude'] = str(latlngadd_tuple[2]) #地点纬度 result['strength'] = '' #灾害强度 result['occurTime'] = releaseTime #发生时间 result['injured'] = '0' #受伤人数 result['death'] = '0' #死亡人数 result['loss'] = '0' #经济损失 result['pictures'] = '' #多个路径之间用分号隔开 result['more'] = '' #特殊字段 result['regional'] = '国内' result['province'] = latlngadd_tuple[3] #灾害发生的一级行政区划 result['country'] = latlngadd_tuple[4] #灾害发生国家 result['current_website'] = '中国海洋预报网' #灾害当前网站 result['isreleasetime'] = '0' #灾害发生时间是否是用发布时间代替 result['isrellonandlat'] = '0' resultSun = {} resultSun['title'] = result['title'] resultSun['originalText'] = result['originalText'] resultSun['pictures'] = result['pictures'] try: title = 'stormSurge_ZH001' res = postgreCommand.insertData(result, resultSun, title) if res == 1: print(title, '数据插入成功!') elif res == 0: print(title, '数据更新成功!') except Exception as e: print("插入数据失败", str(e))