def score(self, targets, **kwargs): '''Score this model against some target values. Arguments: targets (pandas.DataFrame): The targets to compare against. **kwargs: Additional arguments are forwarded to :meth:`load_data`. Returns: pandas.DataFrame: A table of metrics. ''' targets = targets.replace([np.inf, -np.inf], np.nan).dropna() predictions = self.predict(targets.index, **kwargs) n_missing = len(targets.index) - len(predictions.index) if n_missing != 0: logger.warning(f'missing {n_missing} predictions') targets = targets.reindex(predictions.index) daynight_scores = apollo.metrics.all(targets, predictions) daynight_scores.index = daynight_scores.index + '_day_night' lat, lon = self.center is_daylight = apollo.is_daylight(targets.index, lat, lon) predictions = predictions[is_daylight] targets = targets[is_daylight] dayonly_scores = metrics.all(targets, predictions) dayonly_scores.index = dayonly_scores.index + '_day_only' scores = daynight_scores.append(dayonly_scores) scores.index.name = 'metric' return scores
def postprocess(self, raw_predictions, index): ''' ''' # Reconstruct the data frame. logger.debug('postprocess: constructing data frame') index = apollo.DatetimeIndex(index, name='time') predictions = super().postprocess(raw_predictions, index) # Set the columns. cols = list(self.columns) assert len(predictions.columns) == len(cols) predictions.columns = pd.Index(cols) # Unscale the predictions. if self.standardize: logger.debug('postprocess: unscaling predictions') predictions[cols] = self.target_scaler.inverse_transform( predictions) # Set overnight predictions to zero (optionally). if self.daylight_only: logger.debug('postprocess: setting night time to zero') (lat, lon) = self.center night = ~apollo.is_daylight(index, lat, lon) predictions.loc[night, :] = 0 return predictions
def postprocess(self, times, raw_predictions): ''' ''' # Reconstruct the data frame. logger.debug('postprocess: constructing data frame') cols = self.cols index = apollo.DatetimeIndex(times, name='time') predictions = pd.DataFrame(raw_predictions, index=index, columns=cols) # Unscale the predictions. if self.standardize: logger.debug('postprocess: unscaling predictions') predictions[cols] = self.target_scaler.inverse_transform(raw_predictions) # Set overnight predictions to zero (optionally). if self.daylight_only: logger.debug('postprocess: setting night time to zero') (lat, lon) = self.center night = not apollo.is_daylight(index, lat, lon) predictions.loc[night, :] = 0 return predictions
def preprocess(self, data, targets=None, fit=False): '''Process feature data into a numpy array. ''' # If we're fitting, we record the column names. # Otherwise we ensure the targets have the expected columns. if fit: logger.debug('preprocess: recording columns') self.columns = list(targets.columns) elif targets is not None: logger.debug('preprocess: checking columns') assert set(targets.columns) == set(self.columns) # Drop NaNs and infinities. logger.debug('preprocess: dropping NaNs and infinities') data = data.replace([np.inf, -np.inf], np.nan).dropna() if targets is not None: targets = targets.replace([np.inf, -np.inf], np.nan).dropna() # We only support 1-hour frequencies. # For overlapping targets, take the mean. if targets is not None: logger.debug('preprocess: aggregating targets') targets = targets.groupby(targets.index.floor('1h')).mean() # Ignore targets at night (optionally). if targets is not None and self.daylight_only: logger.debug('preprocess: dropping night time targets') times = targets.index (lat, lon) = self.center targets = targets[apollo.is_daylight(times, lat, lon)] # The indices for the data and targets may not match. # We can only consider their intersection. if targets is not None: logger.debug('preprocess: joining features and targets') index = data.index.intersection(targets.index) data = data.loc[index] targets = targets.loc[index] else: index = data.index # Scale the feature data (optionally). if self.standardize: logger.debug('preprocess: scaling features') cols = list(data.columns) raw_data = data[cols].to_numpy() if fit: self.feature_scaler.fit(raw_data) data[cols] = self.feature_scaler.transform(raw_data) # Scale the target data (optionally). if self.standardize and targets is not None: logger.debug('preprocess: scaling targets') cols = self.columns raw_targets = targets[cols].to_numpy() if fit: self.target_scaler.fit(raw_targets) targets[cols] = self.target_scaler.transform(raw_targets) # Compute additional features (optionally). if self.add_time_of_day: logger.debug('preprocess: computing time-of-day') data = data.join(apollo.time_of_day(index)) if self.add_time_of_year: logger.debug('preprocess: computing time-of-year') data = data.join(apollo.time_of_year(index)) # We always return both, even if targets was not given. # We must return numpy arrays. logger.debug('preprocess: casting to numpy') return data.to_numpy(), targets.to_numpy()