def create_fitted_model( specs: ModelSpecs, version: str, # TODO: throw out? regression_frame: pd.DataFrame = None, ) -> MODEL_TYPES: """ Create a new fitted model with the given specs. """ if regression_frame is None: regression_frame = construct_features(time_range="train", specs=specs) # Remove any observation where data is missing. # Other parts of the workflow cannot handle missing data, so everything should be verified here. regression_frame = regression_frame.dropna(axis=1) if regression_frame.empty: raise MissingData( "Missing data (probably one of the regressors contains no data)") x_train = regression_frame.drop(columns=[specs.outcome_var.name]) y_train = regression_frame[specs.outcome_var.name] package_str = specs.model_type.__module__.split(".")[0] if package_str == "statsmodels": model = specs.model_type(y_train, x_train, **specs.model_params) fitted_model = model.fit() fitted_model.get_params = fitted_model.params elif package_str == "sklearn": model = specs.model_type(**specs.model_params) fitted_model = model.fit(X=x_train, y=y_train) fitted_model.params = fitted_model.get_params else: raise UnsupportedModel("Unknown model type: %s " % specs.model_type) return fitted_model
def create_fitted_model( specs: ModelSpecs, version: str, # TODO: throw out? regression_frame: pd.DataFrame = None, ) -> MODEL_TYPES: """ Create a new fitted model with the given specs. """ if regression_frame is None: regression_frame = construct_features(time_range="train", specs=specs) x_train = regression_frame.drop(columns=[specs.outcome_var.name]) y_train = regression_frame[specs.outcome_var.name] if specs.library_name == "statsmodels": model = specs.model_type(y_train, x_train, **specs.model_params) fitted_model = model.fit() fitted_model.get_params = fitted_model.params elif specs.library_name == "sklearn": model = specs.model_type(**specs.model_params) fitted_model = model.fit(X=x_train, y=y_train) fitted_model.params = fitted_model.get_params else: raise UnsupportedModel( "Not sure which library your model is based on: %s." " See ModelSpecs.set_model." % specs.model_type) return fitted_model
def make_rolling_forecasts( start: datetime, # Start of forecast period end: datetime, # End of forecast period model_specs: ModelSpecs, ) -> Tuple[pd.Series, ModelState]: """ Repeatedly call make_forecast - for all time steps the desired time window (end is excluding). The time window of the specs (training + test data) is moved forwards also step by step. Will fail if series specs do not allocate enough data. May create a model whenever the previous one is outdated. Return Pandas.Series as result, as well as the last ModelState. """ # Prepare time range for dt in (start, end): if dt.tzinfo is None: dt.replace(tzinfo=pytz.utc) # First, compute one big feature frame, once. feature_frame: pd.DataFrame = construct_features( (model_specs.start_of_training, end), model_specs) pd_frequency = timedelta_to_pandas_freq_str(model_specs.frequency) values = pd.Series(index=pd.date_range( start, end, freq=pd_frequency, closed="left", tz=start.tzinfo)) time_step = start model = None logger.info("Forecasting from %s to %s" % (start, end)) while time_step < end: model, specs = update_model(time_step, model, model_specs, feature_frame=feature_frame).split() features = feature_frame.loc[time_step:time_step].iloc[:, 1:] values[time_step] = make_forecast_for(model_specs, features, model) time_step = time_step + model_specs.frequency return values, ModelState(model, model_specs)
def evaluate_models(m1: ModelState, m2: Optional[ModelState] = None, plot_path: str = None): """ Run a model or two against test data and plot results. Useful to judge model performance or compare two models. Shows RMSE values, plots error distributions and prints the time it took to forecast. TODO: support testing m2 next to m1 """ fitted_m1, m1_specs = m1.split() regression_frame = construct_features(time_range="test", specs=m1_specs) x_test = regression_frame.iloc[:, 1:] y_test = np.array(regression_frame.iloc[:, 0]) try: y_hat_test = fitted_m1.predict(x_test) except TypeError: y_hat_test = fitted_m1.predict(start=x_test.index[0], end=x_test.index[-1], exog=x_test) # Back-transform if the data was transformed if m1_specs.outcome_var.feature_transformation is not None: y_test = m1_specs.outcome_var.feature_transformation.back_transform_value( y_test) y_hat_test = m1_specs.outcome_var.feature_transformation.back_transform_value( y_hat_test) print("rmse = %s" % (str( round(sm.tools.eval_measures.rmse(y_test, y_hat_test, axis=0), 4)))) plot_true_versus_predicted(regression_frame.index, y_test, y_hat_test, None, None, plot_path) plot_error_graph(y_test, y_hat_test, plot_path=plot_path)