Ejemplo n.º 1
0
def analyzeInfo_one(item):
    result = {}
    a_title = item.find_all('a')
    result['disasterid'] = '10107'  #类别:暴雨
    result['link'] = 'http://www.cibeicn.com' + a_title[0]['href']  # 新闻链接
    source = get_source(result['link'])
    result['source'] = re.findall(r'来源:(.+)', source)[0]  #新闻来源
    result['originalText'] = get_original(result['link'])  # 新闻原文
    release = get_releaseTime(result['link'])
    time_str1 = re.sub("\D", "", release)
    datetime_struct1 = parser.parse(time_str1)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['releaseTime'] = releaseTime  # 发布时间
    strong_info_list = item.find('strong')
    if strong_info_list == None:
        a_info_list = a_title[0].get_text().strip()
        result['title'] = a_info_list  # 标题
    else:
        result['title'] = strong_info_list.get_text().strip()
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeMany(originalText)
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    result['strength'] = ''  #灾害强度
    result['occurTime'] = result['releaseTime']  #发生时间
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['pictures'] = ''  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['current_website'] = '防灾网'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替

    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']

    try:
        title = 'rainstorm_ZH001'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
Ejemplo n.º 2
0
def analyzeInfo(item):
    result = {}
    a_list = item.find_all('a', limit=1)
    h3_list = item.find_all('h3',attrs={'class': 'tit'},limit=1)
    span_link_list = h3_list[0].find_all('span', limit=1)
    div_list = item.find_all('div', attrs={'class': 'src-tim'}, limit=1)
    span2_list = div_list[0].find_all('span', attrs={'class': 'tim'}, limit=1)
    time_str = re.sub('\D', "", span2_list[0].get_text().strip())
    datetime_struct = parser.parse(time_str)
    releaseTime = datetime_struct.strftime('%Y-%m-%d %H:%M:%S')
    result['disasterid'] = '10002'                                          #新闻类别
    result['link'] = span_link_list[0]['lanmu1']                            #新闻链接
    result['title'] = a_list[0].get_text().strip()                          #新闻标题
    result['releaseTime'] = releaseTime                                     #发布时间
    print(result['link'])
    analyze = analyzeInfoSun(result['link'])
    result['source'] = analyze[1]                                           #新闻来源
    result['originalText'] = analyze[0]                                     #新闻原文
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeSingle(originalText)
    result['place'] = latlngadd_tuple[0]                                    #发生地点
    result['longitude'] = str(latlngadd_tuple[1])                           #地点经度
    result['latitude'] = str(latlngadd_tuple[2])                            #地点纬度
    result['strength'] = ''                                                 #灾害强度
    result['occurTime'] = result['releaseTime']                             #发生时间
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)                                        #经济损失
    result['injured'] = str(injured)                                        #受伤人数
    result['death'] = str(death)                                            #死亡人数
    result['pictures'] = analyze[2]                                         #多个路径之间用分号隔开
    result['more'] = ''                                                     #特殊字段
    result['regional'] = '国内'
    result['province'] = latlngadd_tuple[3]                                 #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]                                  #灾害发生国家
    result['current_website'] = '央视网'                                     #灾害当前网站
    result['isreleasetime'] = '1'                                           #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']
    try:
        title = 'landslide_ZH006'
        res = postgreCommand.insertData(result,resultSun,title)
        if res == 1:
            print(title,'数据插入成功!')
        elif res == 0:
            print(title,'数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
Ejemplo n.º 3
0
def analyzeInfo(item):
    result = {}
    h3_list = item.find_all('h3', limit=1)
    a_list = h3_list[0].find_all('a', limit=1)
    div_list = item.find('div', attrs={'class': 'c-summary c-row '
                                       })  #.find('p').get_text().split()
    p_list = div_list.find('p').get_text().split()
    time_str = re.sub("\D", "", p_list[1] + p_list[2])
    datetime_struct1 = parser.parse(time_str)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['link'] = a_list[0]['href']
    result['title'] = a_list[0].get_text().strip()
    result['releaseTime'] = releaseTime
    result['disasterid'] = '1000104'
    originalList = get_original(result['link'])
    result['source'] = originalList[0]
    result['originalText'] = originalList[1]
    result['pictures'] = originalList[2]
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeSingle(originalText)
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['strength'] = ''
    result['occurTime'] = result['releaseTime']  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'  #新闻发布地区
    result['current_website'] = '百度新闻'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']
    try:
        title = 'collapse_ZH003'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
Ejemplo n.º 4
0
def analyzeInfo_One(item):
    result = {}
    divs = item.find_all('div')
    title = divs[0].find('a').get_text().strip()
    link = 'http://www.qxkp.net' + divs[0].find('a')['href']
    time_str1 = re.sub("\D", "", divs[1].get_text())
    datetime_struct1 = parser.parse(time_str1)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['disasterid'] = '10107'  #类别:暴雨
    result['link'] = link  # 新闻链接
    resultSun = analyzeInfo_Two(link)
    result['source'] = resultSun['source']  #新闻来源
    result['originalText'] = resultSun['originalText']  # 新闻原文
    result['releaseTime'] = releaseTime  # 发布时间
    result['title'] = title  # 标题
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeMany(originalText)
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    result['strength'] = ''  #灾害强度
    result['occurTime'] = result['releaseTime']  #发生时间
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['pictures'] = resultSun['pictures']  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['current_website'] = '气象科普网'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']

    try:
        title = 'rainstorm_ZH002'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
Ejemplo n.º 5
0
def analyzeInfo(item):
    result = {}
    h2_list = item.find_all('h2', limit=1)
    a_list = h2_list[0].find_all('a')
    span_list = h2_list[0].find_all('span')
    span_new = span_list[0].get_text().strip().split()

    result['link'] = a_list[0]['href']
    result['title'] = a_list[0].get_text().strip()
    result['releaseTime'] = span_new[1] + ' ' + span_new[2]
    originalList = get_original(result['link'])
    if originalList[3]:
        result['source'] = originalList[0]
        result['originalText'] = originalList[1]
        result['pictures'] = originalList[2]
        result['disasterid'] = '1000104'
        originalText = result['title'] + ',' + result['originalText']
        latlngadd_tuple = address.placeSingle(originalText)
        result['place'] = latlngadd_tuple[0]  #发生地点
        result['longitude'] = str(latlngadd_tuple[1])  #地点经度
        result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
        death = toYc.death(originalText)
        injured = toYc.Injured(originalText)
        lossNumber = toYc.loss(originalText)
        result['loss'] = str(lossNumber)  #经济损失
        result['injured'] = str(injured)  #受伤人数
        result['death'] = str(death)  #死亡人数
        result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
        result['country'] = latlngadd_tuple[4]  #灾害发生国家
        result['strength'] = ''
        result['occurTime'] = result['releaseTime']  #多个路径之间用分号隔开
        result['more'] = ''  #特殊字段
        result[
            'regional'] = '国内'  #新闻发布地区                               #灾害发生国家
        result['current_website'] = '新浪网'  #灾害当前网站
        result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
        result['isrellonandlat'] = '0'
        resultSun = {}
        resultSun['title'] = result['title']
        resultSun['originalText'] = result['originalText']
        resultSun['pictures'] = result['pictures']
        try:
            title = 'collapse_ZH005'
            res = postgreCommand.insertData(result, resultSun, title)
            if res == 1:
                print(title, '数据插入成功!')
            elif res == 0:
                print(title, '数据更新成功!')
        except Exception as e:
            print("插入数据失败", str(e))
Ejemplo n.º 6
0
def analyzeInfo(item):
    result = {}
    h4_list = item.find_all('h4', limit=1)
    a_list = h4_list[0].find_all('a', limit=1)
    i_list = item.find_all('i', limit=1)
    time_str = (i_list[0].get_text().strip())[3:]
    datetime_struct1 = parser.parse(time_str)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['link'] = a_list[0]['href']  #新闻链接
    result['title'] = a_list[0].get_text().strip()  #新闻标题
    result['releaseTime'] = releaseTime  #发布时间
    originalList = get_original(result['link'])
    result['source'] = originalList[1]  #新闻来源
    result['originalText'] = originalList[0]  #新闻原文
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeMany(originalText)
    result['disasterid'] = '10107'  #灾害类型
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    result['strength'] = ''
    result['occurTime'] = result['releaseTime']
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['pictures'] = originalList[2]  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'  #新闻发布地区
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['current_website'] = '天气网'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']
    try:
        title = 'rainstorm_ZH006'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
Ejemplo n.º 7
0
def analyzeInfo(item):
    result = {}
    a_info = item.find('a')
    result['link'] = 'http://japan.people.com.cn' + a_info['href']
    result['title'] = a_info.get_text().strip()
    datetime_struct1 = parser.parse(item.find('span').get_text().strip())
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['releaseTime'] = releaseTime
    analyze = analyzeInfoSun(result['link'])
    result['source'] = analyze[1]
    result['originalText'] = analyze[0]
    originalText = result['title'] + ',' + result['originalText']
    result['disasterid'] = disasterNB(originalText)  #新闻类别:崩塌
    if result['disasterid'] != '0':
        latlngadd_tuple = placeSingle(originalText)
        result['place'] = latlngadd_tuple[0]  #发生地点
        result['longitude'] = str(latlngadd_tuple[1])  #地点经度
        result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
        result['strength'] = ''  #灾害强度
        result['occurTime'] = result[
            'releaseTime']  #parser.parse('2017-10-01 12:12:12')     #发生时间
        death = toYc.death(originalText)
        injured = toYc.Injured(originalText)
        lossNumber = toYc.loss(originalText)
        result['loss'] = str(lossNumber)  #经济损失
        result['injured'] = str(injured)  #受伤人数
        result['death'] = str(death)  #死亡人数
        result['pictures'] = analyze[2]  #多个路径之间用分号隔开
        result['more'] = ''  #特殊字段
        result['regional'] = '国内'  #新闻发布地区
        result['province'] = ''  #灾害发生的一级行政区划
        result['country'] = '日本'  #灾害发生国家
        result['current_website'] = '人民网'  #灾害当前网站
        result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
        result['isrellonandlat'] = '0'
        resultSun = {}
        resultSun['title'] = result['title']
        resultSun['originalText'] = result['originalText']
        resultSun['pictures'] = result['pictures']
        try:
            title = 'comprehensive_ZH001'
            res = postgreCommand.insertData(result, resultSun, title)
            if res == 1:
                print(title, '数据插入成功!')
            elif res == 0:
                print(title, '数据更新成功!')
        except Exception as e:
            print("插入数据失败", str(e))
Ejemplo n.º 8
0
def analyzeInfo(item):
    result = {}
    a_list = item.find_all('td',limit=1)
    result['link'] = a_list[0].get_text().strip()                               #新闻链接
    originalList = get_original(result['link'])
    result['title'] = originalList[0]                                           #新闻标题
    result['source'] = originalList[1]                                          #新闻来源
    result['releaseTime'] = originalList[2]                                     #发布时间
    result['originalText'] = originalList[3]                                    #新闻原文
    result['disasterid'] = '10205'                                              #灾害类型
    result['pictures'] = originalList[4]                                        #新闻图片
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeSingle(originalText)
    result['place'] = latlngadd_tuple[0]                                        #发生地点
    result['longitude'] = str(latlngadd_tuple[1])                               #地点经度
    result['latitude'] = str(latlngadd_tuple[2])                                #地点纬度
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)                                            #经济损失
    result['injured'] = str(injured)                                            #受伤人数
    result['death'] = str(death)                                                #死亡人数                                             
    result['province'] = latlngadd_tuple[3]                                     #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]                                      #灾害发生国家                                          
    result['strength'] = ''
    result['occurTime'] = result['releaseTime']                                 #多个路径之间用分号隔开
    result['more'] = ''                                                         #特殊字段
    result['regional'] = '国内'                                                 #新闻发布地区                               #灾害发生国家
    result['current_website'] = '大众网'                                        #灾害当前网站
    result['isreleasetime'] = '1'                                               #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '1'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']
    try:
        title = 'tsunami_ZH003'
        res = postgreCommand.insertData(result,resultSun,title)
        if res == 1:
            print(title,'数据插入成功!')
        elif res == 0:
            print(title,'数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))