def aggregate(self, freq: str = None, spatial: bool = False): """ Aggregate observations """ if self.count() > 0 and not self._data.isnull().values.all(): # Create temporal instance temp = copy(self) # Set default frequency if not set if freq is None: freq = self._freq # Time aggregation temp._data = temp._data.groupby( ['station', pd.Grouper(level='time', freq=freq)]).agg(temp.aggregations) # Spatial aggregation if spatial: temp._data = temp._data.groupby( [pd.Grouper(level='time', freq=freq)]).mean() # Round temp._data = temp._data.round(1) # Return class instance return temp # Show warning & return self warn('Skipping aggregation on empty DataFrame') return self
def normalize(self): """ Normalize the DataFrame """ if self.count() == 0: warn('Pointless normalization of empty DataFrame') # Create temporal instance temp = copy(self) if temp._start and temp._end and temp.coverage() < 1: # Create result DataFrame result = pd.DataFrame(columns=temp._columns[temp._first_met_col:]) # Handle tz-aware date ranges if hasattr(temp, '_timezone') and temp._timezone is not None: timezone = pytz.timezone(temp._timezone) start = temp._start.astimezone(timezone) end = temp._end.astimezone(timezone) else: start = temp._start end = temp._end # Go through list of weather stations for station in temp._stations: # Create data frame df = pd.DataFrame(columns=temp._columns[temp._first_met_col:]) # Add time series df['time'] = pd.date_range( start, end, freq=self._freq, tz=temp._timezone if hasattr(temp, '_timezone') else None) # Add station ID df['station'] = station # Add columns for column in temp._columns[temp._first_met_col:]: # Add column to DataFrame df[column] = NaN result = pd.concat([result, df], axis=0) # Set index result = result.set_index(['station', 'time']) # Merge data temp._data = pd.concat([temp._data, result], axis=0).groupby(['station', 'time'], as_index=True).first() # None -> NaN temp._data = temp._data.fillna(NaN) # Return class instance return temp
def normalize(self): """ Normalize the DataFrame """ # Create temporal instance temp = copy(self) if self.count() == 0: warn('Pointless normalization of empty DataFrame') # Go through list of weather stations for station in temp._stations: # The list of periods periods: pd.Index = pd.Index([]) # Get periods if self.count() > 0: periods = temp._data[temp._data.index.get_level_values( 'station') == station].index.unique('end') elif periods.size == 0 and self._end: periods = pd.Index([self._end]) # Go through all periods for period in periods: # Create DataFrame df = pd.DataFrame( columns=temp._columns[temp._first_met_col:]) # Populate index columns df['month'] = range(1, 13) df['station'] = station df['start'] = period - 29 df['end'] = period # Set index df.set_index( ['station', 'start', 'end', 'month'], inplace=True) # Merge data temp._data = pd.concat([temp._data, df], axis=0).groupby( ['station', 'start', 'end', 'month'], as_index=True).first() # None -> NaN temp._data = temp._data.fillna(np.NaN) # Return class instance return temp
def interpolate(self, limit: int = 3): """ Interpolate NULL values """ if self.count() > 0 and not self._data.isnull().values.all(): # Create temporal instance temp = copy(self) # Apply interpolation temp._data = temp._data.groupby('station').apply( lambda group: group.interpolate( method='linear', limit=limit, limit_direction='both', axis=0)) # Return class instance return temp # Show warning & return self warn('Skipping interpolation on empty DataFrame') return self
def load_handler( endpoint: str, path: str, columns: list, types: dict, parse_dates: list, coerce_dates: bool = False ) -> pd.DataFrame: """ Load a single CSV file into a DataFrame """ try: # Read CSV file from Meteostat endpoint df = pd.read_csv( endpoint + path, compression='gzip', names=columns, dtype=types, parse_dates=parse_dates) # Force datetime conversion if coerce_dates: df.iloc[:, parse_dates] = df.iloc[:, parse_dates].apply( pd.to_datetime, errors='coerce') except (FileNotFoundError, HTTPError): # Create empty DataFrane df = pd.DataFrame(columns=[*types]) # Display warning warn(f'Cannot load {path} from {endpoint}') # Return DataFrame return df