Example #1
0
    def postprocess(self, raw_predictions, index):
        '''
        '''
        # Reconstruct the data frame.
        logger.debug('postprocess: constructing data frame')
        index = apollo.DatetimeIndex(index, name='time')
        predictions = super().postprocess(raw_predictions, index)

        # Set the columns.
        cols = list(self.columns)
        assert len(predictions.columns) == len(cols)
        predictions.columns = pd.Index(cols)

        # Unscale the predictions.
        if self.standardize:
            logger.debug('postprocess: unscaling predictions')
            predictions[cols] = self.target_scaler.inverse_transform(
                predictions)

        # Set overnight predictions to zero (optionally).
        if self.daylight_only:
            logger.debug('postprocess: setting night time to zero')
            (lat, lon) = self.center
            night = ~apollo.is_daylight(index, lat, lon)
            predictions.loc[night, :] = 0

        return predictions
Example #2
0
def times_to_reftimes(times):
    '''Compute the reference times for forecasts containing the given times.

    On the edge case, this may select one extra forecast per time.

    Arguments:
        times (numpy.ndarray like):
            A series of forecast times.

    Returns:
        apollo.DatetimeIndex:
            The set of reftimes for forecasts containing the given times.
    '''
    reftimes = apollo.DatetimeIndex(times, name='reftime').unique()
    a = reftimes.floor('6h').unique()
    b = a - pd.Timedelta('6h')
    c = a - pd.Timedelta('12h')
    d = a - pd.Timedelta('18h')
    e = a - pd.Timedelta('24h')
    f = a - pd.Timedelta('30h')
    g = a - pd.Timedelta('36h')
    return a.union(b).union(c).union(d).union(e).union(f).union(g)
Example #3
0
    def postprocess(self, times, raw_predictions):
        '''
        '''
        # Reconstruct the data frame.
        logger.debug('postprocess: constructing data frame')
        cols = self.cols
        index = apollo.DatetimeIndex(times, name='time')
        predictions = pd.DataFrame(raw_predictions, index=index, columns=cols)

        # Unscale the predictions.
        if self.standardize:
            logger.debug('postprocess: unscaling predictions')
            predictions[cols] = self.target_scaler.inverse_transform(raw_predictions)

        # Set overnight predictions to zero (optionally).
        if self.daylight_only:
            logger.debug('postprocess: setting night time to zero')
            (lat, lon) = self.center
            night = not apollo.is_daylight(index, lat, lon)
            predictions.loc[night, :] = 0

        return predictions
Example #4
0
    def load_data(self, index, dedupe_strategy='best'):
        '''Load input data for the given times.

        Arguments:
            index (pandas.DatetimeIndex):
                The times to forecast.
            dedupe_strategy (str or int):
                The strategy for selecting between duplicate forecasts.
                **TODO:** Better documentation.

        Returns:
            pandas.DataFrame:
                A data fram indexed by the forecast time.
        '''
        index = apollo.DatetimeIndex(index)
        index = index.floor('1h').unique()

        # Load the xarray data.
        logger.debug('load: loading netcdf')
        reftimes = nam.times_to_reftimes(index)
        data = nam.open(reftimes, on_miss='skip')
        data = data[self.features]
        data = data.astype('float32')

        # Select geographic area.
        logger.debug('load: slicing geographic area')
        data = nam.slice_geo(data, center=self.center, shape=self.shape)

        # Create a data frame.
        # This will have a multi-index of `(reftime, forecast, x, y, *z)`,
        # where `*z` is all of the different z-axes in the dataset.
        logger.debug('load: converting to dataframe')
        data = data.to_dataframe().drop(['lat', 'lon'], axis=1)

        # Replace `reftime` and `forecast` levels with `time`.
        logger.debug('load: reindex by time')
        old_index = data.index
        data = data.set_index('time', append=True)
        data = data.reorder_levels(['time', *old_index.names])
        data = data.reset_index('reftime', drop=True)
        data = data.reset_index('forecast', drop=False)

        # Filter to only the times requested.
        data = data.loc[index]

        # Handle duplicates.
        logger.debug(f'load: selecting forecast hour (dedupe_strategy={dedupe_strategy})')
        if dedupe_strategy == 'best':
            data = data.groupby(data.index) \
                .apply(lambda g: g[g.forecast == g.forecast.min()]) \
                .droplevel(0)
        elif isinstance(dedupe_strategy, int) and dedupe_strategy < 6:
            delta = pd.Timedelta('6h')
            lo = delta * dedupe_strategy
            hi = lo + delta
            data = data.groupby(data.index) \
                .apply(lambda g: g[(lo <= g.forecast) & (g.forecast < hi)]) \
                .droplevel(0)
        else:
            raise ValueError(f'invalid dedupe_strategy {repr(dedupe_strategy)}')

        # We no longer need the forecast hour.
        data = data.drop('forecast', axis=1)

        # Unstack until we're indexed by time alone, then flatten the columns.
        logger.debug('load: unstacking geographic indices')
        while 2 < len(data.index.levels):
            data = data.unstack()
        data = data.unstack()
        data.columns = data.columns.to_flat_index()

        # The index has lost its timezone information. Fix it.
        data.index = apollo.DatetimeIndex(data.index)

        # We're done.
        return data