Ejemplo n.º 1
0
def aggregate(self, freq: str = None, spatial: bool = False):
    """
    Aggregate observations
    """

    if self.count() > 0 and not self._data.isnull().values.all():

        # Create temporal instance
        temp = copy(self)

        # Set default frequency if not set
        if freq is None:
            freq = self._freq

        # Time aggregation
        temp._data = temp._data.groupby(
            ['station', pd.Grouper(level='time',
                                   freq=freq)]).agg(temp.aggregations)

        # Spatial aggregation
        if spatial:
            temp._data = temp._data.groupby(
                [pd.Grouper(level='time', freq=freq)]).mean()

        # Round
        temp._data = temp._data.round(1)

        # Return class instance
        return temp

    # Show warning & return self
    warn('Skipping aggregation on empty DataFrame')
    return self
Ejemplo n.º 2
0
def normalize(self):
    """
    Normalize the DataFrame
    """

    if self.count() == 0:
        warn('Pointless normalization of empty DataFrame')

    # Create temporal instance
    temp = copy(self)

    if temp._start and temp._end and temp.coverage() < 1:

        # Create result DataFrame
        result = pd.DataFrame(columns=temp._columns[temp._first_met_col:])

        # Handle tz-aware date ranges
        if hasattr(temp, '_timezone') and temp._timezone is not None:
            timezone = pytz.timezone(temp._timezone)
            start = temp._start.astimezone(timezone)
            end = temp._end.astimezone(timezone)
        else:
            start = temp._start
            end = temp._end

        # Go through list of weather stations
        for station in temp._stations:
            # Create data frame
            df = pd.DataFrame(columns=temp._columns[temp._first_met_col:])
            # Add time series
            df['time'] = pd.date_range(
                start,
                end,
                freq=self._freq,
                tz=temp._timezone if hasattr(temp, '_timezone') else None)
            # Add station ID
            df['station'] = station
            # Add columns
            for column in temp._columns[temp._first_met_col:]:
                # Add column to DataFrame
                df[column] = NaN

            result = pd.concat([result, df], axis=0)

        # Set index
        result = result.set_index(['station', 'time'])

        # Merge data
        temp._data = pd.concat([temp._data, result],
                               axis=0).groupby(['station', 'time'],
                                               as_index=True).first()

        # None -> NaN
        temp._data = temp._data.fillna(NaN)

    # Return class instance
    return temp
Ejemplo n.º 3
0
    def normalize(self):
        """
        Normalize the DataFrame
        """

        # Create temporal instance
        temp = copy(self)

        if self.count() == 0:
            warn('Pointless normalization of empty DataFrame')

        # Go through list of weather stations
        for station in temp._stations:
            # The list of periods
            periods: pd.Index = pd.Index([])
            # Get periods
            if self.count() > 0:
                periods = temp._data[temp._data.index.get_level_values(
                    'station') == station].index.unique('end')
            elif periods.size == 0 and self._end:
                periods = pd.Index([self._end])
            # Go through all periods
            for period in periods:
                # Create DataFrame
                df = pd.DataFrame(
                    columns=temp._columns[temp._first_met_col:])
                # Populate index columns
                df['month'] = range(1, 13)
                df['station'] = station
                df['start'] = period - 29
                df['end'] = period
                # Set index
                df.set_index(
                    ['station', 'start', 'end', 'month'], inplace=True)
                # Merge data
                temp._data = pd.concat([temp._data, df], axis=0).groupby(
                    ['station', 'start', 'end', 'month'], as_index=True).first()

        # None -> NaN
        temp._data = temp._data.fillna(np.NaN)

        # Return class instance
        return temp
Ejemplo n.º 4
0
def interpolate(self, limit: int = 3):
    """
    Interpolate NULL values
    """

    if self.count() > 0 and not self._data.isnull().values.all():

        # Create temporal instance
        temp = copy(self)

        # Apply interpolation
        temp._data = temp._data.groupby('station').apply(
            lambda group: group.interpolate(
                method='linear', limit=limit, limit_direction='both', axis=0))

        # Return class instance
        return temp

    # Show warning & return self
    warn('Skipping interpolation on empty DataFrame')
    return self
Ejemplo n.º 5
0
def load_handler(
    endpoint: str,
    path: str,
    columns: list,
    types: dict,
    parse_dates: list,
    coerce_dates: bool = False
) -> pd.DataFrame:
    """
    Load a single CSV file into a DataFrame
    """

    try:

        # Read CSV file from Meteostat endpoint
        df = pd.read_csv(
            endpoint + path,
            compression='gzip',
            names=columns,
            dtype=types,
            parse_dates=parse_dates)

        # Force datetime conversion
        if coerce_dates:
            df.iloc[:, parse_dates] = df.iloc[:, parse_dates].apply(
                pd.to_datetime, errors='coerce')

    except (FileNotFoundError, HTTPError):

        # Create empty DataFrane
        df = pd.DataFrame(columns=[*types])

        # Display warning
        warn(f'Cannot load {path} from {endpoint}')

    # Return DataFrame
    return df