Esempio n. 1
0
    def get_weather_data(self):
        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('start_time', recordDate)
        retry_url = []
        for key in self.county_index.keys():
            for year in range(2011, 2019):
                time.sleep(1)
                for month in range(1, 13):
                    url = self.make_weather_url(key, year, month)
                    if url == '':
                        continue
                    res_text = common_fun.get_url_text(url, 'error.log')
                    if res_text == '':
                        continue
                    elif res_text == 503:
                        retry_data = {'key': key, 'url': url}
                        retry_url.append(retry_data)
                        continue
                    self.analyze_data(res_text)
                self.updata_to_mysql()

        for retry_data in retry_url:
            print(retry_data)
            res_text = common_fun.get_url_text(retry_data['url'], 'error.log')
            if res_text == '':
                continue
            self.analyze_data(res_text)
            self.updata_to_mysql()

        recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        print('end_time', recordDate)
Esempio n. 2
0
    def city_index(self):
        url = 'http://tianqi.2345.com/js/citySelectData.js'
        res_str = common_fun.get_url_text(url, 'error.log')
        new_str = res_str.split('var provqx=new Array();')[1]
        for i in range(10, 44):
            new_str = new_str.replace('provqx[' + str(i) + ']=', '')

        new_str = new_str.replace('\n', '').replace('[\'',
                                                    '').replace('\']', '')

        str_list = new_str.split('\r')
        for province in str_list:
            province_list = province.split(',')
            for citys in province_list:
                citys_list = citys.split('|')
                for county in citys_list:
                    county_str_list = re.split('[- ]', county)
                    try:
                        if county_str_list[0].replace(
                                '\'',
                                '') == county_str_list[3].replace('\'', ''):
                            self.county_index[county_str_list[
                                2]] = county_str_list[0].replace('\'', '')
                    except:
                        print(county_str_list)

        with open('citys.txt', 'w') as f:
            f.write(str(self.county_index.keys()))
            f.write(str(len(self.county_index.keys())))
    def get_list_county_data(self):
        retry_url = []
        for key in self.foreign_citys.keys():
            for city_data in self.foreign_citys[key]:
                for one_county in self.county_list:
                    if one_county in city_data['cn_county']:
                        recordDate = time.strftime("%Y-%m-%d %H:%M:%S",
                                                   time.localtime())
                        print(recordDate, one_county)
                        select_str = 'SELECT * FROM weather_data_other WHERE city_name_en = "' + city_data[
                            'city_name'] + '"'
                        res_sel = self.db.select(select_str)
                        if len(res_sel) > 0:
                            continue

                        for year in range(2011, 2019):
                            for month in range(1, 13):
                                url = self.make_foreign_url(
                                    key, city_data['city_name'], year, month)
                                res_text = common_fun.get_url_text(
                                    url, 'foreign_err.log')
                                if res_text == '':
                                    continue
                                elif res_text == 503:
                                    retry_data = {
                                        'key': key,
                                        'url': url,
                                        'city_data': city_data
                                    }
                                    retry_url.append(retry_data)
                                    continue
                                self.analyze_data(res_text, key, city_data)

                            self.updata_to_mysql()

        for retry_data in retry_url:
            res_text = common_fun.get_url_text(retry_data['url'],
                                               'foreign_err.log')
            if res_text == '':
                continue
            self.analyze_data(res_text, retry_data['key'],
                              retry_data['city_data'])
            self.updata_to_mysql()
        return
    def foreign_city_index(self):
        self.foreign_citys = {}
        url = 'http://tianqi.2345.com/js/interCitySelectData.js'
        res_str = common_fun.get_url_text(url, 'foreign_err.log')
        new_str = res_str.split('var city = [];')[1]

        state_datas = new_str.split(';')
        for state_data in state_datas:
            if len(state_data) < 10:
                continue

            state_data = state_data.replace('city[\'', '')
            state = state_data.split('\']=')[0].strip()
            self.foreign_citys[state] = []
            state_data = state_data.split('\']=')[1].strip()
            state_data = state_data.replace('\"',
                                            '').replace('[',
                                                        '').replace(']', '')
            city_list_en = []

            county_datas = state_data.split(',')
            for county_data in county_datas:
                city_datas = county_data.split('|')
                for city_data in city_datas:
                    if len(city_data) < 4:
                        continue
                    city_split = city_data.split(' ')
                    city_name_en = city_split[0][:-2]
                    city_name_cn = city_split[1]
                    city_name_en = city_name_en.replace('\'', '\\\'')

                    if city_name_en not in city_list_en:
                        city_list_en.append(city_name_en)
                        city_info = {
                            'city_name': city_name_en,
                            'cn_county': city_name_cn
                        }
                        if city_info not in self.foreign_citys[state]:
                            self.foreign_citys[state].append(city_info)