Beispiel #1
0
 def __open_or_create(self):
     if not path.exists(self.config_folder):
         makedirs(self.config_folder)
     if not path.exists(self.config_folder + self.config_name):
         self.config.add_section('LastPrediction')
         self.config.add_section('LastParse')
         self.config.add_section('LastFit')
         tm = get_date_now() - timedelta(days=1)
         self.config.set('LastFit', 'date', str(get_date_now().timestamp()))
         self.config.set('LastParse', 'date', str(tm.timestamp()))
         self.config.set('LastPrediction', 'date', str(tm.timestamp()))
         self.__save_config()
     else:
         self.config.read(self.config_folder + self.config_name)
Beispiel #2
0
    def fit_all_old(self, back_days=7):
        """Дообучение всех существующих моделей

        """
        last_fit_timestamp = float(self.config.get_last_fit_date())
        last_fit_date = date_from_timestamp(last_fit_timestamp)
        if (get_date_now() - last_fit_date).days < back_days:
            return

        stations = get_stations()
        pollutions = [p.type for p in get_all_pollution_types()]
        for pollution in pollutions:
            for station in stations:
                model_name = f'{pollution}_mg_m3_{station.mosecom_id}'
                if path.exists(self.single_folder + model_name + '.h5'):
                    self.fit_old(model_name, station.mosecom_id, station.id, pollution)

        self.config.set_last_fit_date(str(get_date_now().timestamp()))
Beispiel #3
0
def run_prediction():
    config = Config()
    last_prediction_date = date_from_timestamp(
        float(config.get_last_prediction_date()))
    last_parse_date = date_from_timestamp(float(config.get_last_parse_date()))
    if last_prediction_date < last_parse_date:
        single_predictor.predict(predict_range=3)
        neighborhood_predictor.predict(predict_range=3)
        aggregator_predictor.predict(predict_range=3)
        fit_predictors.fit_all_old(back_days=7)

        config.set_last_prediction_date(str(get_date_now().timestamp()))
Beispiel #4
0
    def predict(self, predict_range: int = 1):
        """Получение прогноза на predict_range часов вперед

        :param predict_range: int значение количества прогнозов с шагом в час
        :return: list прогнозов загрязнения
        """
        date = get_date_now()
        stations = get_stations()
        for station in stations:
            # Извлечение данных по станции
            station_data = station.to_dict()
            station_name = station_data['mosecom_id']

            # Извлечение данных о погоде и прогнозе погоды, а также маппинг данных
            weather = get_weather(station)[0].to_dict()
            weather_forecast = [
                get_weather_forecast(station, hour)[0].to_dict() for hour in range(1, predict_range + 1)
            ]

            # Получение видов загрязнений
            pollution_entities = get_all_pollution_types()

            # Инициация прогнозирования по каждому загрязнению для predict_range часов
            for pollution_type in pollution_entities:
                # Текущее загрязнение на станции
                value = get_pollutions(station=station, pollution_type=pollution_type)

                if value:
                    # Список прогнозов для predict_range часов
                    predictions = [None for _ in range(predict_range + 1)]
                    predictions[0] = value[0].to_dict()['mg_m3_value']

                    # Предзагрузка модели
                    try:
                        self.preload_model(pollution_type.to_dict()['type'], station_name)
                    except Exception as e:
                        # TODO: Logger
                        continue

                    # Инициация прогноза почасового прогноза и добавление в БД
                    for i in range(predict_range):
                        raw_prediction = self.__make_predict(weather, weather_forecast[i], predictions[i])
                        predictions[i + 1] = raw_prediction
                        # Добавление данных в БД
                        add_pollution_forecast(station=station, date=date, delta_hour=i + 1, predictor='single',
                                               pollution_type=pollution_type, value=raw_prediction)
Beispiel #5
0
    def fit_old(self, model_name: str, station_name: str, station_id: int, pollution_type: str, back_days=7):
        """Дообучение существующей конкретной модели

        @param model_name: имя существующего файла модели
        @param station_name: имя станции модели
        @param station_id: id станции модели
        @param pollution_type: тип загрязнителя
        @param back_days: прошедшее кол-во дней для обучения
        """
        back_time = get_date_now() - timedelta(days=back_days)
        back_pollution = Pollution.select(lambda p: p.datetime >= back_time and
                                                    p.station_id.id == station_id and
                                                    p.pollution_type.type == pollution_type)
        back_pollution_list = [p.to_dict() for p in back_pollution]

        back_weather = Weather.select(lambda w: w.datetime >= back_time and w.station_id.id == station_id)
        back_weather_list = [w.to_dict() for w in back_weather]

        back_weather_forecast = WeatherForecast.select(lambda wf: wf.obs_datetime >= back_time and
                                                                  wf.station_id.id == station_id)
        back_weather_forecast_list = [wf.to_dict() for wf in back_weather_forecast]

        df = pd.DataFrame([], columns=SINGLE_COLUMNS)
        for pollution in back_pollution_list:
            pol_day = pollution['datetime'].day
            pol_hour = pollution['datetime'].hour

            weather = next((w for w in back_weather_list if w['datetime'].day == pol_day and
                            w['datetime'].hour == pol_hour), None)
            weather_forecast = next((wf for wf in back_weather_forecast_list if wf['obs_datetime'].day == pol_day and
                                     wf['obs_datetime'].hour == pol_hour), None)
            next_pollution = next((p for p in back_pollution_list if p['datetime'].day == pol_day and
                                   p['datetime'].hour == pol_hour), None)

            if weather and weather_forecast and next_pollution:
                merged = merge_auto_fit_data(weather, weather_forecast, pollution['mg_m3_value'],
                                             next_pollution['mg_m3_value'])
                df = df.append(merged, ignore_index=True)

        df = predict_mapping_df(df)
        df = df.dropna()
        df = df.astype(float)

        if df.empty:
            return

        self.model_folder = self.single_folder
        self.scaler_folder = self.single_minmaxscaler_folder
        self.preload_model(pollution_type, station_name)

        y_data = pd.DataFrame(df[df.columns[1]])
        y_data = pd.DataFrame(self.predictor.yscaler.transform(y_data))

        x_data = df.drop(df.columns[1], axis=1)
        x_data = pd.DataFrame(self.predictor.xscaler.transform(x_data))

        archive_dir = self.archive_folder
        if not path.exists(archive_dir):
            makedirs(archive_dir)
        archive_name = f'{archive_dir}{model_name}_{get_date_now().strftime("%Y-%m-%d_%H-%M-%S")}.h5'
        self.predictor.model.save(archive_name)

        model = self.__additional_training(self.predictor.model, x_data, y_data)
        print(model_name)
        model.save(f'{self.single_folder}{model_name}.h5')
Beispiel #6
0
    def predict(self, predict_range: int = 1):
        """Получение прогноза на predict_range часов вперед

        :param predict_range: int значение количества прогнозов с шагом в час
        :return: list прогнозов загрязнения
        """
        date = get_date_now()
        for station_name, neighbors in STATION_NEIGHBORHOOD.items():
            # Извлечение данных по станции
            station_entity = get_station_by_name(station_name)
            if station_entity:
                # Извлечение данных о погоде и прогнозе погоды
                weather = get_weather(station_entity)[0].to_dict()
                weather_forecast = [
                    get_weather_forecast(station_entity, hour)[0].to_dict()
                    for hour in range(1, predict_range + 1)
                ]

                # Данные о погоде и прогнозе погоды в соседних станциях
                neighbor_weather_dict = {}
                # Структура:
                # neighbor_station: [weather, forecast_weather] ...
                for neighbor_station in neighbors:
                    neighbor_station_entity = get_station_by_name(
                        neighbor_station)
                    if neighbor_station_entity:
                        neighbor_weather_dict[neighbor_station] = [
                            get_weather(neighbor_station_entity)[0].to_dict(),
                            [
                                get_weather_forecast(station_entity,
                                                     hour)[0].to_dict()
                                for hour in range(1, predict_range + 1)
                            ]
                        ]

                # Получение видов загрязнений
                pollution_entities = get_all_pollution_types()

                # Инициация прогнозирования по каждому загрязнению для predict_range часов
                for pollution_type in pollution_entities:
                    # Текущее загрязнение на станции
                    value = get_pollutions(station=station_entity,
                                           pollution_type=pollution_type)

                    if value:
                        # Список прогнозов для predict_range часов
                        predictions = [None for _ in range(predict_range + 1)]
                        predictions[0] = value[0].to_dict()['mg_m3_value']

                        # Предзагрузка модели
                        try:
                            self.preload_model(
                                pollution_type.to_dict()['type'], station_name)
                        except Exception as e:
                            # TODO: Logger
                            continue

                        # Данные о текущем загрязнении и прогнозе загрязнения первого компонента на соседних станциях
                        # Структура:
                        # neighbor_station: [pollution, forecast_pollution1, ...] ...
                        neighbor_bad_pollution_flag = False
                        neighbor_pollution_dict = {}
                        for neighbor_station in neighbors:
                            neighbor_station_entity = get_station_by_name(
                                neighbor_station)
                            if neighbor_station:
                                try:
                                    neighbor_pollution_dict[
                                        neighbor_station] = [
                                            get_pollutions(
                                                neighbor_station_entity,
                                                pollution_type)[0].to_dict()
                                            ['mg_m3_value']
                                        ] + [
                                            f.to_dict()['value']
                                            for f in get_pollution_forecast(
                                                neighbor_station_entity,
                                                pollution_type, 'single')
                                        ]
                                except IndexError as e:
                                    neighbor_bad_pollution_flag = True

                            # Проверка наличия прогнозов
                            if None in neighbor_pollution_dict.get(neighbor_station, [None]) or \
                                    len(neighbor_pollution_dict.get(neighbor_station, [])) != predict_range + 1:
                                neighbor_bad_pollution_flag = True

                        # Переход к следующей станции, если по этой станции недостаточно данных
                        if neighbor_bad_pollution_flag:
                            continue

                        # Инициация прогноза почасового прогноза и добавление в БД
                        for i in range(predict_range):
                            neighborhood_data = self.__make_neighborhood_data(
                                neighbor_weather_dict, neighbor_pollution_dict,
                                i)
                            raw_prediction = self.__make_neighborhood_predict(
                                weather, weather_forecast[i], predictions[i],
                                neighborhood_data)
                            predictions[i + 1] = raw_prediction
                            # Добавление данных в БД
                            add_pollution_forecast(
                                station=station_entity,
                                date=date,
                                delta_hour=i + 1,
                                predictor='neighborhood',
                                pollution_type=pollution_type,
                                value=raw_prediction)
Beispiel #7
0
def run_parser():
    # Запуск парсинга новых данных!
    database_extender.auto_extend(forecast_hour_range=3)
    config = Config()
    config.set_last_parse_date(str(get_date_now().timestamp()))
Beispiel #8
0
    def predict(self, predict_range: int = 1):
        """Получение прогноза аггрегирующего компонента на predict_range часов вперед

        :param predict_range: int значение количества прогнозов с шагом в час
        :return: list прогнозов загрязнения
        """
        date = get_date_now()
        for station_name, neighbors in STATION_NEIGHBORHOOD.items():
            # Извлечение данных по станции
            station_entity = get_station_by_name(station_name)
            if station_entity:
                # Извлечение данных о погоде и прогнозе погоды
                weather = get_weather(station_entity)[0].to_dict()
                weather_forecast = [
                    get_weather_forecast(station_entity, hour)[0].to_dict()
                    for hour in range(1, predict_range + 1)
                ]

                # Данные о погоде и прогнозе погоды в соседних станциях
                neighbor_weather_dict = {}
                # Структура:
                # neighbor_station: [weather, forecast_weather] ...
                for neighbor_station in neighbors:
                    neighbor_station_entity = get_station_by_name(
                        neighbor_station)
                    if neighbor_station_entity:
                        neighbor_weather_dict[neighbor_station] = [
                            get_weather(neighbor_station_entity)[0].to_dict(),
                            [
                                get_weather_forecast(station_entity,
                                                     hour)[0].to_dict()
                                for hour in range(1, predict_range + 1)
                            ]
                        ]

                # Получение видов загрязнений
                pollution_entities = get_all_pollution_types()

                # Инициация прогнозирования по каждому загрязнению для predict_range часов
                for pollution_type in pollution_entities:
                    # Данные прогнозов первого и второго компонентов
                    single_value = [
                        p.to_dict()['value'] for p in get_pollution_forecast(
                            station_entity, pollution_type, 'single')
                    ]
                    neighbor_value = [
                        p.to_dict()['value'] for p in get_pollution_forecast(
                            station_entity, pollution_type, 'neighborhood')
                    ]

                    # Текущее загрязнение на станции
                    value = get_pollutions(station=station_entity,
                                           pollution_type=pollution_type)

                    if single_value and neighbor_value and value:
                        # Список прогнозов для predict_range часов
                        predictions = [None for _ in range(predict_range + 1)]
                        predictions[0] = value[0].to_dict()['mg_m3_value']

                        # Предзагрузка модели
                        try:
                            self.preload_model(
                                pollution_type.to_dict()['type'], station_name)
                        except Exception as e:
                            # TODO: Logger
                            continue

                        # Инициация прогноза почасового прогноза и добавление в БД
                        for i in range(predict_range):
                            neighborhood_data = self.__make_neighborhood_data(
                                neighbor_weather_dict, i)
                            raw_prediction = self.__make_aggregator_predict(
                                predictions[i], weather, weather_forecast[i],
                                [single_value[i], neighbor_value[i]],
                                neighborhood_data)
                            predictions[i + 1] = raw_prediction
                            # Добавление данных в БД
                            add_pollution_forecast(
                                station=station_entity,
                                date=date,
                                delta_hour=i + 1,
                                predictor='aggregator',
                                pollution_type=pollution_type,
                                value=raw_prediction)