def normalize_temperature_inputs(nn_input: DataFrame) -> DataFrame: for i in range(3): nn_input[f'{i}_masl_diff'] = ku.normalize(nn_input[f'{i}_masl_diff'], -1000, 1000) nn_input[f'{i}_min'] = ku.normalize(nn_input[f'{i}_min'], temperature_lower, temperature_upper) nn_input[f'{i}_mean'] = ku.normalize(nn_input[f'{i}_mean'], temperature_lower, temperature_upper) nn_input[f'{i}_max'] = ku.normalize(nn_input[f'{i}_max'], temperature_lower, temperature_upper) return nn_input
def get_as_aggregated(self, grouping_duration=7, years: List[int] = None, normalize_data=True): if self.weather_data is None or list( self.weather_data.year.unique()) != years: self.__load_from_files(years) def group_columns(columns, n): return [columns[x:x + n] for x in range(0, len(columns), n)] def aggregate_cols(dataframe, col_regex, func): col_subset = dataframe.filter(regex=col_regex) # grouping_duration of 1 day basically means we don't need to aggregate any further if grouping_duration == 1: col_subset.columns = range(0, len(col_subset.columns)) return col_subset grouped_cols = group_columns(col_subset.columns, grouping_duration) aggregated_cols = [func(dataframe[x]) for x in grouped_cols] return pd.concat(aggregated_cols, axis=1) min_temp = aggregate_cols(self.weather_data, 'temperature_day_[0-9]+_min', lambda df: df.min(axis=1)) \ .add_prefix("min_temp") max_temp = aggregate_cols(self.weather_data, 'temperature_day_[0-9]+_max', lambda df: df.max(axis=1)) \ .add_prefix("max_temp") mean_temp = aggregate_cols(self.weather_data, 'temperature_day_[0-9]+_mean', lambda df: df.mean(axis=1)) \ .add_prefix("mean_temp") sum_temp = aggregate_cols(self.weather_data, 'temperature_day_[0-9]+_mean', lambda df: df.sum(axis=1)) \ .add_prefix("sum_temp") sum_rain = aggregate_cols(self.weather_data, 'precipitation_day_[0-9]+', lambda df: df.sum(axis=1)) \ .add_prefix("total_rain") if normalize_data: return pd.concat( [ self.weather_data[['year', 'orgnr']], normalize(self.find_growth_start()), normalize(min_temp, -30, 30), normalize(max_temp, -30, 30), normalize(mean_temp, -30, 30), # normalize(sum_temp, -30, 30), normalize(sum_rain, 0, 10), ], axis=1) return pd.concat( [ self.weather_data[['year', 'orgnr']], self.find_growth_start(), min_temp, max_temp, mean_temp, # normalize(sum_temp, -30, 30), sum_rain, ], axis=1)
weather_data = frost.get_as_aggregated(1) data = data.merge(weather_data, on=['year', 'orgnr']) elevation_data = get_farmer_elevation() data = data.merge(elevation_data, on=['orgnr']) historical_data = ku.get_historical_production(kornmo, data.year.unique(), 4) data = data.merge(historical_data, on=['orgnr', 'year']) data.dropna(inplace=True) data['y'] = data['levert'] / data['areal'] data.drop('levert', axis=1, inplace=True) data['y'] = ku.normalize(data['y'], 0, 1000) data['areal'] = ku.normalize(data['areal']) data['fulldyrket'] = ku.normalize(data['fulldyrket']) data['overflatedyrket'] = ku.normalize(data['overflatedyrket']) data['tilskudd_dyr'] = ku.normalize(data['tilskudd_dyr']) data['growth_start_day'] = ku.normalize(data['growth_start_day']) data['lat'] = ku.normalize(data['lat']) data['elevation'] = ku.normalize(data['elevation']) y_column = ['y'] remove_from_training = [ 'orgnr', 'kommunenr', 'gaardsnummer', 'bruksnummer', 'festenummer', 'year' ] + y_column train, val = train_test_split(shuffle(data), test_size=0.2)
def normalize_temperature_actual(nn_actual: DataFrame) -> DataFrame: nn_actual['station_x_min'] = ku.normalize(nn_actual['station_x_min'], temperature_lower, temperature_upper) nn_actual['station_x_mean'] = ku.normalize(nn_actual['station_x_mean'], temperature_lower, temperature_upper) nn_actual['station_x_max'] = ku.normalize(nn_actual['station_x_max'], temperature_lower, temperature_upper) return nn_actual
def normalize_precipitation_actual(nn_actual: DataFrame) -> DataFrame: nn_actual['station_x_actual'] = ku.normalize(nn_actual['station_x_actual'], 0, precipitation_upper) return nn_actual
def normalize_precipitation_inputs(nn_input: DataFrame) -> DataFrame: for i in range(3): nn_input[f'{i}_masl_diff'] = ku.normalize(nn_input[f'{i}_masl_diff'], -1000, 1000) nn_input[f'{i}_value'] = ku.normalize(nn_input[f'{i}_value'], 0, precipitation_upper) return nn_input