def normalize_temperature_inputs(nn_input: DataFrame) -> DataFrame:
    for i in range(3):
        nn_input[f'{i}_masl_diff'] = ku.normalize(nn_input[f'{i}_masl_diff'], -1000, 1000)
        nn_input[f'{i}_min'] = ku.normalize(nn_input[f'{i}_min'], temperature_lower, temperature_upper)
        nn_input[f'{i}_mean'] = ku.normalize(nn_input[f'{i}_mean'], temperature_lower, temperature_upper)
        nn_input[f'{i}_max'] = ku.normalize(nn_input[f'{i}_max'], temperature_lower, temperature_upper)
    return nn_input
Esempio n. 2
0
    def get_as_aggregated(self,
                          grouping_duration=7,
                          years: List[int] = None,
                          normalize_data=True):
        if self.weather_data is None or list(
                self.weather_data.year.unique()) != years:
            self.__load_from_files(years)

        def group_columns(columns, n):
            return [columns[x:x + n] for x in range(0, len(columns), n)]

        def aggregate_cols(dataframe, col_regex, func):
            col_subset = dataframe.filter(regex=col_regex)

            # grouping_duration of 1 day basically means we don't need to aggregate any further
            if grouping_duration == 1:
                col_subset.columns = range(0, len(col_subset.columns))
                return col_subset

            grouped_cols = group_columns(col_subset.columns, grouping_duration)
            aggregated_cols = [func(dataframe[x]) for x in grouped_cols]
            return pd.concat(aggregated_cols, axis=1)

        min_temp = aggregate_cols(self.weather_data, 'temperature_day_[0-9]+_min', lambda df: df.min(axis=1)) \
            .add_prefix("min_temp")
        max_temp = aggregate_cols(self.weather_data, 'temperature_day_[0-9]+_max', lambda df: df.max(axis=1)) \
            .add_prefix("max_temp")
        mean_temp = aggregate_cols(self.weather_data, 'temperature_day_[0-9]+_mean', lambda df: df.mean(axis=1)) \
            .add_prefix("mean_temp")
        sum_temp = aggregate_cols(self.weather_data, 'temperature_day_[0-9]+_mean', lambda df: df.sum(axis=1)) \
            .add_prefix("sum_temp")
        sum_rain = aggregate_cols(self.weather_data, 'precipitation_day_[0-9]+', lambda df: df.sum(axis=1)) \
            .add_prefix("total_rain")

        if normalize_data:
            return pd.concat(
                [
                    self.weather_data[['year', 'orgnr']],
                    normalize(self.find_growth_start()),
                    normalize(min_temp, -30, 30),
                    normalize(max_temp, -30, 30),
                    normalize(mean_temp, -30, 30),
                    # normalize(sum_temp, -30, 30),
                    normalize(sum_rain, 0, 10),
                ],
                axis=1)

        return pd.concat(
            [
                self.weather_data[['year', 'orgnr']],
                self.find_growth_start(),
                min_temp,
                max_temp,
                mean_temp,
                # normalize(sum_temp, -30, 30),
                sum_rain,
            ],
            axis=1)
Esempio n. 3
0
    weather_data = frost.get_as_aggregated(1)
    data = data.merge(weather_data, on=['year', 'orgnr'])

    elevation_data = get_farmer_elevation()
    data = data.merge(elevation_data, on=['orgnr'])

    historical_data = ku.get_historical_production(kornmo, data.year.unique(),
                                                   4)
    data = data.merge(historical_data, on=['orgnr', 'year'])

    data.dropna(inplace=True)

    data['y'] = data['levert'] / data['areal']
    data.drop('levert', axis=1, inplace=True)

    data['y'] = ku.normalize(data['y'], 0, 1000)
    data['areal'] = ku.normalize(data['areal'])
    data['fulldyrket'] = ku.normalize(data['fulldyrket'])
    data['overflatedyrket'] = ku.normalize(data['overflatedyrket'])
    data['tilskudd_dyr'] = ku.normalize(data['tilskudd_dyr'])
    data['growth_start_day'] = ku.normalize(data['growth_start_day'])
    data['lat'] = ku.normalize(data['lat'])
    data['elevation'] = ku.normalize(data['elevation'])

    y_column = ['y']
    remove_from_training = [
        'orgnr', 'kommunenr', 'gaardsnummer', 'bruksnummer', 'festenummer',
        'year'
    ] + y_column

    train, val = train_test_split(shuffle(data), test_size=0.2)
def normalize_temperature_actual(nn_actual: DataFrame) -> DataFrame:
    nn_actual['station_x_min'] = ku.normalize(nn_actual['station_x_min'], temperature_lower, temperature_upper)
    nn_actual['station_x_mean'] = ku.normalize(nn_actual['station_x_mean'], temperature_lower, temperature_upper)
    nn_actual['station_x_max'] = ku.normalize(nn_actual['station_x_max'], temperature_lower, temperature_upper)
    return nn_actual
def normalize_precipitation_actual(nn_actual: DataFrame) -> DataFrame:
    nn_actual['station_x_actual'] = ku.normalize(nn_actual['station_x_actual'], 0, precipitation_upper)
    return nn_actual
def normalize_precipitation_inputs(nn_input: DataFrame) -> DataFrame:
    for i in range(3):
        nn_input[f'{i}_masl_diff'] = ku.normalize(nn_input[f'{i}_masl_diff'], -1000, 1000)
        nn_input[f'{i}_value'] = ku.normalize(nn_input[f'{i}_value'], 0, precipitation_upper)
    return nn_input