def get_citylist_to_mongo(citylist_xml): save_to_db = DB_Utils() soup = BeautifulSoup(citylist_xml, 'lxml') #print(soup.prettify()) city_info = soup.select('d') for i in city_info: data = { 'province': i.get('d4'), 'city_name': i.get('d2'), 'city_code': i.get('d1'), 'city_piny': i.get('d3') } if i.get('d4') == '韩国': break save_to_db.save_one_to_mongo('citylist', data) print('正在将城市列表存储到mongodb: #', i.get('d4'), i.get('d2'))
def get_1day_weather_data(web_data): soup = BeautifulSoup(web_data, 'lxml') html = etree.HTML(web_data) # print(html) # print(soup) try: datas = html.xpath('//script') # print(datas) abc = soup.select('.ctop .crumbs') db_utils = DB_Utils() for data in datas: # print(data.text) # print(data.text()) if 'hour3data' in str(data.text): print(data.text) data1 = {} # print('###########', data.text) data1 = data.text.split('=')[-1] # print('aaaa',data1) data2 = json.loads(data1) # print(data2['1d']) print(abc[0].text.replace('\n', '').replace(' ', ''), '实时天气:') city = abc[0].text.replace('\n', '').replace(' ', '') city_info = city + '实时天气:\n' data4 = [] for i in data2['1d']: # print(i.split(',')) #print(i.split(',')[0], end=' ') # print(i.split(',')[1]) #print(i.split(',')[2], end=' ') #print(i.split(',')[3], end=' ') #print(i.split(',')[4], end=' ') #print(i.split(',')[5]) data3 = [ i.split(',')[0], i.split(',')[2], i.split(',')[3], i.split(',')[4], i.split(',')[5] ] print(data3) data4.append(data3) weather_data = {'city': city, '24hour_weather': data4} db_utils.save_one_to_mongo('24hour_weather', weather_data) print('save to mongo') except Exception as e: print(e)
def get_1day_weather_data(web_data): soup = BeautifulSoup(web_data, 'lxml') datas = soup.select('script') abc = soup.select('.ctop .crumbs') db_utils = DB_Utils() for data in datas: if 'hour3data' in str(data): data1 = {} data1 = data.text.split('=')[-1] # print(data1) data2 = json.loads(data1) # print(data2['1d']) print(abc[0].text.replace('\n', '').replace(' ', ''), '实时天气:') city = abc[0].text.replace('\n', '').replace(' ', '') city_info = city + '实时天气:\n' data4 = [] for i in data2['1d']: # print(i.split(',')) #print(i.split(',')[0], end=' ') # print(i.split(',')[1]) #print(i.split(',')[2], end=' ') #print(i.split(',')[3], end=' ') #print(i.split(',')[4], end=' ') #print(i.split(',')[5]) data3 = [ i.split(',')[0], i.split(',')[2], i.split(',')[3], i.split(',')[4], i.split(',')[5] ] print(data3) data4.append(data3) weather_data = {'city': city, '24hour_weather': data4} db_utils.save_one_to_mongo('24hour_weather', weather_data) print('save to mongo')
f.write('}') f.close() #使用BeautifulSoup解析列表xml并提取城市信息存储到MongoDB def get_citylist_to_mongo(citylist_xml): save_to_db = DB_Utils() soup = BeautifulSoup(citylist_xml, 'lxml') #print(soup.prettify()) city_info = soup.select('d') for i in city_info: data = { 'province': i.get('d4'), 'city_name': i.get('d2'), 'city_code': i.get('d1'), 'city_piny': i.get('d3') } if i.get('d4') == '韩国': break save_to_db.save_one_to_mongo('citylist', data) print('正在将城市列表存储到mongodb: #', i.get('d4'), i.get('d2')) if __name__ == "__main__": #url = CITYLIST_URL db_utils = DB_Utils() db_utils.drop_collection('citylist') url = 'http://mobile.weather.com.cn/js/citylist.xml' citylist_xml = get_citylist_xml(url) get_citylist_to_mongo(citylist_xml)
''' author : DannyWu site : www.idannywu.com ''' from db import DB_Utils from get_weather import * db1 = DB_Utils() arg = {'city_name': '蚌埠'} result = db1.query_of_arg('1day_weather_data', arg) header = ['1', '2', '3', '4', '5'] for i in result: #print(i) #print(i.get('24hour_weather')) print(i.get('city')) weather = [i.get('day_wea'), i.get('night_wea')] pretty_print(weather, ['1', '2', '3', '4']) print('24小时实时天气:') pretty_print(i.get('24hour_weather'), header) #url = get_city_weather_url('蚌埠') #web_data = get_weather_html(url) #print(web_data) #get_weather_data(web_data)
#print(i.split(',')[4], end=' ') #print(i.split(',')[5]) data3 = [ i.split(',')[0], i.split(',')[2], i.split(',')[3], i.split(',')[4], i.split(',')[5] ] print(data3) data4.append(data3) weather_data = {'city': city, '24hour_weather': data4} db_utils.save_one_to_mongo('24hour_weather', weather_data) print('save to mongo') if __name__ == "__main__": db_util = DB_Utils() db_util.drop_collection('24hour_weather') web_data_list = [] for key in cities: url = get_city_weather_url(key) web_data = get_weather_html(url) get_1day_weather_data(web_data) #web_data_list.append(web_data) #get_weather_data(web_data) #pool = Pool(1) #pool.map(get_1day_weather_data,web_data_list) #pool.close()
def get_1day_weather_data(province, city_name, city_code, city_piny, web_data): try: soup = BeautifulSoup(web_data, 'lxml') datas = soup.select('script') address = soup.select('.ctop .crumbs') day_wea = soup.select('.t .clearfix .wea')[0].text night_wea = soup.select('.t .clearfix .wea')[1].text day_tem = soup.select('.t .clearfix .tem')[0].text.replace('\n', '') night_tem = soup.select('.t .clearfix .tem')[1].text.replace('\n', '') day_win = soup.select('.t .clearfix .win span')[0].get( 'title') + soup.select('.t .clearfix .win span')[0].text night_win = soup.select('.t .clearfix .win span')[1].get( 'title') + soup.select('.t .clearfix .win span')[1].text sun_up = soup.select('.t .clearfix .sunUp')[0].text.replace('\n', '') sun_down = soup.select('.t .clearfix .sunDown')[0].text.replace( '\n', '') day_wea_list = [day_wea, day_tem, day_win, sun_up] night_wea_list = [night_wea, night_tem, night_win, sun_down] print(day_wea_list) print(night_wea_list) db_utils = DB_Utils() for data in datas: if 'hour3data' in str(data): data1 = {} data1 = data.text.split('=')[-1] # print(data1) data2 = json.loads(data1) # print(data2['1d']) print(address[0].text.replace('\n', '').replace(' ', ''), '实时天气:') city = address[0].text.replace('\n', '').replace(' ', '') # city_info = city + '实时天气:\n' data4 = [] for i in data2['1d']: data3 = [ i.split(',')[0], i.split(',')[2], i.split(',')[3], i.split(',')[4], i.split(',')[5] ] print(data3) data4.append(data3) weather_data_of_1day = { 'province': province, 'city_name': city_name, 'city_code': city_code, 'city_piny': city_piny, 'city': city, 'day_wea': day_wea_list, 'night_wea': night_wea_list, '24hour_weather': data4 } db_utils.save_one_to_mongo('1day_weather_data', weather_data_of_1day) print('save to mongo') except: print(province, city_name, city_code) db_utils1 = DB_Utils() error_city = { 'province': province, 'city_name': city_name, 'city_code': city_code, } db_utils1.save_one_to_mongo('error', error_city) print('save error to mongo') pass
weather_data_of_1day) print('save to mongo') except: print(province, city_name, city_code) db_utils1 = DB_Utils() error_city = { 'province': province, 'city_name': city_name, 'city_code': city_code, } db_utils1.save_one_to_mongo('error', error_city) print('save error to mongo') pass if __name__ == "__main__": db_util = DB_Utils() db_util.drop_collection('1day_weather_data') data = db_util.query_all('citylist') for i in data: url = city_code_to_url(i.get('city_code')) province = i.get('province') city_name = i.get('city_name') city_code = i.get('city_code') city_piny = i.get('city_piny') info = [city_name, city_code, city_piny] print(info) web_data = get_weather_html(url) get_1day_weather_data(province, city_name, city_code, city_piny, web_data)