def get_weather_data(self): recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('start_time', recordDate) retry_url = [] for key in self.county_index.keys(): for year in range(2011, 2019): time.sleep(1) for month in range(1, 13): url = self.make_weather_url(key, year, month) if url == '': continue res_text = common_fun.get_url_text(url, 'error.log') if res_text == '': continue elif res_text == 503: retry_data = {'key': key, 'url': url} retry_url.append(retry_data) continue self.analyze_data(res_text) self.updata_to_mysql() for retry_data in retry_url: print(retry_data) res_text = common_fun.get_url_text(retry_data['url'], 'error.log') if res_text == '': continue self.analyze_data(res_text) self.updata_to_mysql() recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('end_time', recordDate)
def city_index(self): url = 'http://tianqi.2345.com/js/citySelectData.js' res_str = common_fun.get_url_text(url, 'error.log') new_str = res_str.split('var provqx=new Array();')[1] for i in range(10, 44): new_str = new_str.replace('provqx[' + str(i) + ']=', '') new_str = new_str.replace('\n', '').replace('[\'', '').replace('\']', '') str_list = new_str.split('\r') for province in str_list: province_list = province.split(',') for citys in province_list: citys_list = citys.split('|') for county in citys_list: county_str_list = re.split('[- ]', county) try: if county_str_list[0].replace( '\'', '') == county_str_list[3].replace('\'', ''): self.county_index[county_str_list[ 2]] = county_str_list[0].replace('\'', '') except: print(county_str_list) with open('citys.txt', 'w') as f: f.write(str(self.county_index.keys())) f.write(str(len(self.county_index.keys())))
def get_list_county_data(self): retry_url = [] for key in self.foreign_citys.keys(): for city_data in self.foreign_citys[key]: for one_county in self.county_list: if one_county in city_data['cn_county']: recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(recordDate, one_county) select_str = 'SELECT * FROM weather_data_other WHERE city_name_en = "' + city_data[ 'city_name'] + '"' res_sel = self.db.select(select_str) if len(res_sel) > 0: continue for year in range(2011, 2019): for month in range(1, 13): url = self.make_foreign_url( key, city_data['city_name'], year, month) res_text = common_fun.get_url_text( url, 'foreign_err.log') if res_text == '': continue elif res_text == 503: retry_data = { 'key': key, 'url': url, 'city_data': city_data } retry_url.append(retry_data) continue self.analyze_data(res_text, key, city_data) self.updata_to_mysql() for retry_data in retry_url: res_text = common_fun.get_url_text(retry_data['url'], 'foreign_err.log') if res_text == '': continue self.analyze_data(res_text, retry_data['key'], retry_data['city_data']) self.updata_to_mysql() return
def foreign_city_index(self): self.foreign_citys = {} url = 'http://tianqi.2345.com/js/interCitySelectData.js' res_str = common_fun.get_url_text(url, 'foreign_err.log') new_str = res_str.split('var city = [];')[1] state_datas = new_str.split(';') for state_data in state_datas: if len(state_data) < 10: continue state_data = state_data.replace('city[\'', '') state = state_data.split('\']=')[0].strip() self.foreign_citys[state] = [] state_data = state_data.split('\']=')[1].strip() state_data = state_data.replace('\"', '').replace('[', '').replace(']', '') city_list_en = [] county_datas = state_data.split(',') for county_data in county_datas: city_datas = county_data.split('|') for city_data in city_datas: if len(city_data) < 4: continue city_split = city_data.split(' ') city_name_en = city_split[0][:-2] city_name_cn = city_split[1] city_name_en = city_name_en.replace('\'', '\\\'') if city_name_en not in city_list_en: city_list_en.append(city_name_en) city_info = { 'city_name': city_name_en, 'cn_county': city_name_cn } if city_info not in self.foreign_citys[state]: self.foreign_citys[state].append(city_info)