Exemple #1
0
    def score(self, targets, **kwargs):
        '''Score this model against some target values.

        Arguments:
            targets (pandas.DataFrame):
                The targets to compare against.
            **kwargs:
                Additional arguments are forwarded to :meth:`load_data`.

        Returns:
            pandas.DataFrame:
                A table of metrics.
        '''
        targets = targets.replace([np.inf, -np.inf], np.nan).dropna()
        predictions = self.predict(targets.index, **kwargs)

        n_missing = len(targets.index) - len(predictions.index)
        if n_missing != 0:
            logger.warning(f'missing {n_missing} predictions')
            targets = targets.reindex(predictions.index)

        daynight_scores = apollo.metrics.all(targets, predictions)
        daynight_scores.index = daynight_scores.index + '_day_night'

        lat, lon = self.center
        is_daylight = apollo.is_daylight(targets.index, lat, lon)
        predictions = predictions[is_daylight]
        targets = targets[is_daylight]
        dayonly_scores = metrics.all(targets, predictions)
        dayonly_scores.index = dayonly_scores.index + '_day_only'

        scores = daynight_scores.append(dayonly_scores)
        scores.index.name = 'metric'
        return scores
Exemple #2
0
    def postprocess(self, raw_predictions, index):
        '''
        '''
        # Reconstruct the data frame.
        logger.debug('postprocess: constructing data frame')
        index = apollo.DatetimeIndex(index, name='time')
        predictions = super().postprocess(raw_predictions, index)

        # Set the columns.
        cols = list(self.columns)
        assert len(predictions.columns) == len(cols)
        predictions.columns = pd.Index(cols)

        # Unscale the predictions.
        if self.standardize:
            logger.debug('postprocess: unscaling predictions')
            predictions[cols] = self.target_scaler.inverse_transform(
                predictions)

        # Set overnight predictions to zero (optionally).
        if self.daylight_only:
            logger.debug('postprocess: setting night time to zero')
            (lat, lon) = self.center
            night = ~apollo.is_daylight(index, lat, lon)
            predictions.loc[night, :] = 0

        return predictions
Exemple #3
0
    def postprocess(self, times, raw_predictions):
        '''
        '''
        # Reconstruct the data frame.
        logger.debug('postprocess: constructing data frame')
        cols = self.cols
        index = apollo.DatetimeIndex(times, name='time')
        predictions = pd.DataFrame(raw_predictions, index=index, columns=cols)

        # Unscale the predictions.
        if self.standardize:
            logger.debug('postprocess: unscaling predictions')
            predictions[cols] = self.target_scaler.inverse_transform(raw_predictions)

        # Set overnight predictions to zero (optionally).
        if self.daylight_only:
            logger.debug('postprocess: setting night time to zero')
            (lat, lon) = self.center
            night = not apollo.is_daylight(index, lat, lon)
            predictions.loc[night, :] = 0

        return predictions
Exemple #4
0
    def preprocess(self, data, targets=None, fit=False):
        '''Process feature data into a numpy array.
        '''
        # If we're fitting, we record the column names.
        # Otherwise we ensure the targets have the expected columns.
        if fit:
            logger.debug('preprocess: recording columns')
            self.columns = list(targets.columns)
        elif targets is not None:
            logger.debug('preprocess: checking columns')
            assert set(targets.columns) == set(self.columns)

        # Drop NaNs and infinities.
        logger.debug('preprocess: dropping NaNs and infinities')
        data = data.replace([np.inf, -np.inf], np.nan).dropna()
        if targets is not None:
            targets = targets.replace([np.inf, -np.inf], np.nan).dropna()

        # We only support 1-hour frequencies.
        # For overlapping targets, take the mean.
        if targets is not None:
            logger.debug('preprocess: aggregating targets')
            targets = targets.groupby(targets.index.floor('1h')).mean()

        # Ignore targets at night (optionally).
        if targets is not None and self.daylight_only:
            logger.debug('preprocess: dropping night time targets')
            times = targets.index
            (lat, lon) = self.center
            targets = targets[apollo.is_daylight(times, lat, lon)]

        # The indices for the data and targets may not match.
        # We can only consider their intersection.
        if targets is not None:
            logger.debug('preprocess: joining features and targets')
            index = data.index.intersection(targets.index)
            data = data.loc[index]
            targets = targets.loc[index]
        else:
            index = data.index

        # Scale the feature data (optionally).
        if self.standardize:
            logger.debug('preprocess: scaling features')
            cols = list(data.columns)
            raw_data = data[cols].to_numpy()
            if fit: self.feature_scaler.fit(raw_data)
            data[cols] = self.feature_scaler.transform(raw_data)

        # Scale the target data (optionally).
        if self.standardize and targets is not None:
            logger.debug('preprocess: scaling targets')
            cols = self.columns
            raw_targets = targets[cols].to_numpy()
            if fit: self.target_scaler.fit(raw_targets)
            targets[cols] = self.target_scaler.transform(raw_targets)

        # Compute additional features (optionally).
        if self.add_time_of_day:
            logger.debug('preprocess: computing time-of-day')
            data = data.join(apollo.time_of_day(index))
        if self.add_time_of_year:
            logger.debug('preprocess: computing time-of-year')
            data = data.join(apollo.time_of_year(index))

        # We always return both, even if targets was not given.
        # We must return numpy arrays.
        logger.debug('preprocess: casting to numpy')
        return data.to_numpy(), targets.to_numpy()