Python CustomModel Examples

Programming Language: Python

Namespace/Package Name: Forecasting.frc_class

Method/Function: CustomModel

Examples at hotexamples.com: 7

Python CustomModel - 7 examples found. These are the top rated real world Python examples of Forecasting.frc_class.CustomModel extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def feature_generation_demo():

    ts_list = load_energy_weather_data(load_raw=False, fnames=TRAIN_FILE_NAMES)
    frc_model = frc_class.CustomModel(Lasso, name="Lasso", alpha=0.0001)
    selector = sel_class.FeatureSelection(name="Katrutsa")
    rewrite = True
    for fg_name in feature_gnt_names[:-2]:  #:["all"]
        generator = gnt_class.FeatureGeneration(name=fg_name,
                                                replace=False,
                                                transformations=[fg_name],
                                                norm=True)
        model, _ = demo_train(ts_list,
                              frc_model=frc_model,
                              fg_mdl=generator,
                              fs_mdl=selector,
                              verbose=True,
                              return_model=True,
                              rewrite=rewrite)
        rewrite = False

        train_error, train_std = competition_errors(model=model,
                                                    names=TRAIN_FILE_NAMES,
                                                    y_idx=TS_IDX)
        test_error, test_std = competition_errors(model=model,
                                                  names=TEST_FILE_NAMES,
                                                  y_idx=TS_IDX)

        res_text = "\n Average MAPE across time series: train = {} with std {}, test = {} with std {} \\\\ \n".\
            format(train_error, train_std, test_error, test_std)

        print(res_text)
        my_plots.save_to_latex(text=res_text, folder=SAVE_DIR, rewrite=rewrite)

Example #2

Show file

def forecating_errors(ts, ts_idx):

    data = regression_matrix.RegMatrix(ts, y_idx=ts_idx, x_idx=ts_idx)
    # Create regression matrix
    data.create_matrix(nsteps=1, norm_flag=True)

    frc_model = frc_class.CustomModel(Lasso, name="Lasso", alpha=0.001)
    # frc_model = frc_class.CustomModel(LSTM.LSTM, name="LSTM")
    # frc_model = frc_class.CustomModel(GatingEnsemble.GatingEnsemble,
    #                                   estimators=[LinearRegression() for i in range(N_EXPERTS)])  # (LSTM.LSTM, name="LSTM")

    # Split data for training and testing
    data.train_test_split(TRAIN_TEST_RATIO)
    model = frc_class.PipelineModel(gen_mdl=None,
                                    sel_mdl=None,
                                    frc_mdl=frc_model)
    model, _, _, _ = model.train_model(
        data.trainX, data.trainY)  # model parameters are changed inside

    data.forecast(model, replace=True)

    train_mae = data.mae(idx_rows=data.idx_train,
                         idx_original=data.original_index)
    train_mape = data.mape(idx_rows=data.idx_train,
                           idx_original=data.original_index)
    test_mae = data.mae(idx_rows=data.idx_test,
                        idx_original=data.original_index)
    test_mape = data.mape(idx_rows=data.idx_test,
                          idx_original=data.original_index)

    return train_mae, train_mape, test_mae, test_mape

Example #3

Show file

def main(file_name=None, line_indices="all", header=True, format_="date"):
    """
    Runs forecasting models and reports results in latex file

    :param file_name: file name (.csv) with data in IoT format
    :type file_name: str
    :param line_indices: indices of lines to read from file.  Lines are enumerated from 1. If "all", read the whole file
    :param header: Specifies if the file contains a header row
    :type header: bool
    :return: latex report
    :rtype: str
    """
    # Init string for latex results:
    latex_str = ""
    time_at_start = time.time()
    if format_ == "date":
        folder = os.path.join(
            "results",
            datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d'))
    else:
        folder = os.path.join(
            "results",
            datetime.datetime.strftime(datetime.datetime.now(),
                                       '%Y-%m-%d-%H-%M-%S'))
    if not os.path.exists(folder):
        os.makedirs(folder)

    # Load data in IoT format
    try:
        data, metric_ids, host_ids, header_names = get_iot_data.get_data(
            file_name, line_indices, header)
    except BaseException as e:
        print("{}. Line indices: {}. Filename {}".format(
            e.message, line_indices, file_name))
        return None

    # Select only data from first dataset in host_ids:
    dataset = list(host_ids.keys())[0]  # select the first dataset # FIXIT
    ts = load_time_series.from_iot_to_struct(
        data, host_ids[dataset],
        dataset)  # get all time series from dataset in TsStruct format
    ts.replace_nans()
    ts.align_time_series(
        max_history=50000
    )  # truncate time series to align starting and ending points
    latex_str += ts.summarize_ts(latex=True)

    # split time series into train and validation
    train, test = ts.train_test_split(
        train_test_ratio=0.75
    )  # split raw time series into train and test parts

    # Plot periodics:
    for i, tsi in enumerate(ts.data):
        save_to = os.path.join(folder, "decompose",
                               "_".join(tsi.name.split(" ")))
        # infer periodicity and try to decompose ts into tend, seasonality and resid:
        try:
            period, msg = arima_model.decompose(tsi,
                                                nhist=500,
                                                folder=save_to,
                                                nsplits=50)
        except Exception as e:
            msg = "Failed to decompose, error message: {}".format(e.message)
        latex_str += my_plots.check_text_for_latex(tsi.name) + ": "
        latex_str += msg
        latex_str += arima_model.make_report(
            os.path.join(save_to),
            write=False)  # adds figures from "save_to" to latex_str

    # Declare models to compare:
    random_forest = frc_class.CustomModel(RandomForestRegressor,
                                          n_jobs=24,
                                          name="RandomForest")
    # mixture_experts = frc_class.CustomModel(GatingEnsemble.GatingEnsemble, name="Mixture",
    #                                          estimators=[RidgeCV(), LassoCV()])
    # lstm = frc_class.CustomModel(LSTM.LSTM, name="LSTM", n_epochs=50, plot_loss=True)
    lasso = frc_class.CustomModel(Lasso,
                                  name="Lasso",
                                  fit_intercept=True,
                                  alpha=2.0)
    lasso_model = frc_class.PipelineModel(frc_mdl=lasso)
    model_list = [lasso_model]  # random_forest, mixture_experts, lstm

    params_range = {}
    params_range["RandomForest"] = {"n_estimators": [3000]}
    params_range["Mixture"] = {"n_hidden_units": [10, 20, 30, 50, 100]}
    params_range["LSTM"] = {"batch_size": [20, 30, 50, 100]}
    params_range["Lasso"] = {
        "alpha": [float(i) / 10000 for i in range(1, 11, 1)] + [0.01, 0.05]
    }  # [20, 30, 50, 100]} #[1.0, 1.25, 1.5, 1.75, 2.0]

    WINDOWS = [2, 5, 7, 10, 15, 20]
    N_FOLDS = 2

    for model in model_list:
        model_save_path = os.path.join(folder, model.name)
        if not os.path.exists(model_save_path):
            os.makedirs(model_save_path)

        # select number of trees and history parameter:
        # (history parameter is divisible by request)
        n_req, params, best_train_mse, plt = train_model_CV(
            train,
            model,
            n_fold=N_FOLDS,
            windows=WINDOWS,
            params=params_range[model.named_steps['frc'].name],
            plot=True)  #windows=[5, 10, 25, 50, 75, 100, 150])

        plt.savefig(os.path.join(model_save_path, "cv_optimization.png"))
        plt.clf()

        # n_req, nr_tree, best_train_mse = 10, 500, 0.00658112163657  # previously estimated
        opt_string = model.name + ". Best CV error: {0}, estimated parameters: history = {1}, {2} = {3} " \
                     "\\\\ \n".format(best_train_mse, n_req, my_plots.check_text_for_latex(list(params.keys())[0]),
                                      list(params.values())[0])
        print(opt_string)
        latex_str += opt_string

        # use selected parameters to forecast trainning data:
        if not len(params) == 0:
            model.__setattr__(list(params.keys())[0], list(params.values())[0])
        data = regression_matrix.RegMatrix(ts)
        data.history = n_req * data.request

        data.create_matrix()
        data.train_test_split()

        model, frc, _, _ = model.train_model(data.trainX, data.trainY)

        if hasattr(frc, "msg"):
            latex_str += frc.msg

        if hasattr(frc, "fig"):
            frc.fig.savefig(os.path.join(model_save_path, "fitting.png"))

        train_frc, _ = data.forecast(model, idx_rows=data.idx_train)
        train_mse = mean_squared_error(train_frc, data.trainY)

        test_frc, _ = data.forecast(model, idx_rows=data.idx_test)
        test_mse = mean_squared_error(test_frc, data.testY)

        latex_str += my_plots.check_text_for_latex(model.name) + "\\\\ \n"
        latex_str += "Train error for estimated parameters: {0}, " \
                     "test error with estimated parameters {1} \\\\ \n".format(train_mse, test_mse)

        err_all = forecasting_errors(data, ts.original_index)
        column_names = [("MAE", "train"), ("MAPE", "train"), ("MAE", "test"),
                        ("MAPE", "test")]
        res_all = data_frame_res(err_all, column_names, ts)

        print(model.name)
        print(res_all)

        latex_str += res_all.to_latex()
        latex_str += "\\bigskip \n \\\\"

        data.plot_frc(n_frc=10, n_hist=10, folder=model_save_path)
        latex_str += my_plots.include_figures_from_folder(model_save_path)

    total_time = time.time() - time_at_start
    latex_str += "\n Total time: {0}\n \\".format(total_time)
    my_plots.print_to_latex(latex_str,
                            check=False,
                            file_name="IoT_example",
                            folder=folder)

    return latex_str

Example #4

Show file

def main(frc_model=None, generator=None, selector=None):

    # Experiment settings.
    TRAIN_TEST_RATIO = 0.75
    N_PREDICTIONS = 10  # plotting par

    # Load and prepare dataset.
    load_raw = True  # not os.path.exists(os.path.join("ProcessedData", "EnergyWeather_orig_train.pkl"))
    ts_struct_list = load_time_series.load_all_time_series(
        datasets='EnergyWeather', load_raw=load_raw, name_pattern="")

    if frc_model is None:
        frc_model = frc_class.CustomModel(
            Lasso, name="Lasso", alpha=0.01
        )  # LSTM.LSTM() #frc_class.IdenitityFrc() #LinearRegression()
    # Create regression model
    model = frc_class.PipelineModel(gen_mdl=generator,
                                    sel_mdl=selector,
                                    frc_mdl=frc_model)

    results = []
    res_text = []
    for ts in ts_struct_list:
        data = regression_matrix.RegMatrix(ts)
        # Create regression matrix
        data.create_matrix(nsteps=1, norm_flag=True)

        # Split data for training and testing
        data.train_test_split(TRAIN_TEST_RATIO)
        model, frc, gen, sel = model.train_model(data.trainX, data.trainY)
        #model, frc, gen, sel = data.train_model(frc_model=frc_model, generator=generator, selector=selector) # model parameters are changed inside

        data.forecast(model, data.idx_test, replace=True)
        data.forecast(model, data.idx_train, replace=True)

        train_mae = data.mae(idx_rows=data.idx_train,
                             idx_original=data.original_index)
        train_mape = data.mape(idx_rows=data.idx_train,
                               idx_original=data.original_index)
        test_mae = data.mae(idx_rows=data.idx_test,
                            idx_original=data.original_index)
        test_mape = data.mape(idx_rows=data.idx_test,
                              idx_original=data.original_index)

        res1 = pd.DataFrame(train_mae,
                            index=[t.name for t in ts.data],
                            columns=[("MAE", "train")])
        res2 = pd.DataFrame(train_mape,
                            index=[t.name for t in ts.data],
                            columns=[("MAPE", "train")])
        res3 = pd.DataFrame(test_mae,
                            index=[t.name for t in ts.data],
                            columns=[("MAE", "test")])
        res4 = pd.DataFrame(test_mape,
                            index=[t.name for t in ts.data],
                            columns=[("MAPE", "test")])
        res = pd.concat([res1, res2, res3, res4], axis=1)
        print(res)

        results.append(res)
        res_text.append(ts.name)

        data.plot_frc(n_frc=N_PREDICTIONS)

    my_plots.save_to_latex(results, df_names=res_text)

    return results

Example #5

Show file

File: forecast_iot_data.py Project: ZZZ199204/Multiscale

def main(file_name, line_indices, header):
    """
    Forecast data simultaneously and separately and compare errors

    :param file_name: file name (.csv) with data in IoT format
    :type file_name: str
    :param line_indices: indices of lines to read from file.  Lines are enumerated from 1. If "all", read the whole file
    :param header: Specifies if the file contains a header row
    :type header: bool
    :return: forecasting errors
    :rtype: pandas.DataFrame
    """

    TRAIN_TEST_RATIO = 0.75
    N_PREDICTIONS = 10
    N_EXPERTS = 4
    VERBOSE = True

    # frc_model = frc_class.CustomModel(Lasso, name="Lasso", alpha=0.001)
    # frc_model = frc_class.CustomModel(GatingEnsemble.GatingEnsemble,
    #                                  estimators = [LinearRegression() for i in range(N_EXPERTS)])#(LSTM.LSTM, name="LSTM")

    ts = utils_.safe_read_iot_data(file_name=file_name,
                                   line_indices=line_indices,
                                   header=header,
                                   default="poisson",
                                   verbose=VERBOSE)
    if VERBOSE:
        print(ts.summarize_ts())

    # my_plots.plot_multiple_ts(ts.data, shared_x=True)
    data = regression_matrix.RegMatrix(ts)
    # Create regression matrix
    data.create_matrix(nsteps=1, norm_flag=True)
    # Split data for training and testing
    data.train_test_split(TRAIN_TEST_RATIO)

    lr_list = [2e-6, 2e-5, 2e-4]
    n_lstm_units = [20, 30, 40, 50]
    hyperpars = {"learning_rate": lr_list, "n_lstm_units": n_lstm_units}
    frc_model = frc_class.CustomModel(LSTM.LSTM,
                                      name="LSTM",
                                      n_epochs=20,
                                      plot_loss=True)
    model = frc_class.PipelineModel(frc_mdl=frc_model)

    model, frc, _, _ = model.train_model(
        data.trainX, data.trainY, hyperpars=hyperpars,
        n_cvs=5)  # model parameters are changed inside

    if hasattr(frc, "fig"):
        frc.fig.savefig("fitting_learn_rate_{}.png".format(frc.learning_rate))

    # data.forecasts returns model obj, forecasted rows of Y matrix and a list [nts] of "flat"/ts indices of forecasted points
    data.forecast(model, replace=True)

    train_mae = data.mae(idx_rows=data.idx_train)
    train_mape = data.mape(idx_rows=data.idx_train)
    test_mae = data.mae(idx_rows=data.idx_test)
    test_mape = data.mape(idx_rows=data.idx_test)

    res1 = pd.DataFrame(train_mae,
                        index=[t.name for t in ts.data],
                        columns=[("MAE", "train")])
    res2 = pd.DataFrame(train_mape,
                        index=[t.name for t in ts.data],
                        columns=[("MAPE", "train")])
    res3 = pd.DataFrame(test_mae,
                        index=[t.name for t in ts.data],
                        columns=[("MAE", "test")])
    res4 = pd.DataFrame(test_mape,
                        index=[t.name for t in ts.data],
                        columns=[("MAPE", "test")])
    res = pd.concat([res1, res2, res3, res4], axis=1)
    print("LSTM")
    print(res)

    data.plot_frc(n_frc=N_PREDICTIONS)

    frc_model = frc_class.CustomModel(Lasso, name="Lasso", alpha=0.001)
    model = frc_class.PipelineModel(frc_mdl=frc_model)
    model, _, _, _ = model.train_model(data.trainX, data.trainY)
    data.forecast(model, replace=True)

    train_mae = data.mae(idx_rows=data.idx_train)
    train_mape = data.mape(idx_rows=data.idx_train)
    test_mae = data.mae(idx_rows=data.idx_test)
    test_mape = data.mape(idx_rows=data.idx_test)

    res1 = pd.DataFrame(train_mae,
                        index=[t.name for t in ts.data],
                        columns=[("MAE", "train")])
    res2 = pd.DataFrame(train_mape,
                        index=[t.name for t in ts.data],
                        columns=[("MAPE", "train")])
    res3 = pd.DataFrame(test_mae,
                        index=[t.name for t in ts.data],
                        columns=[("MAE", "test")])
    res4 = pd.DataFrame(test_mape,
                        index=[t.name for t in ts.data],
                        columns=[("MAPE", "test")])
    res = pd.concat([res1, res2, res3, res4], axis=1)
    print("Lasso")
    print(res)

    return res

Example #6

Show file

def main():
    """
    Provides an example of usage of the system.

    The model consists of three main components: feature generation, feature selection and forecasting model.
    Feature generation and selection may be empty:
    generation = None
    selection = None

    which is the same as
    generator = gnt_class.FeatureGeneration(name="Identity generator")
    selector = sel_class.FeatureSelection(name="Identity selector", on=False)

    Other options for feature generation:
    generator = gnt_class.FeatureGeneration(name="univariate", replace=False, norm=True
                                            transformations=["univariate_transformation", "centroids"])
    generator = gnt_class.Nonparametric()
    generator = gnt_class.Monotone()

    Examples of using sklearn solutions:
    frc_class.CustomModel(PCA, name="Randomized PCA", svd_solver="randomized")
    frc_class.CustomModel(PCA, name="PCA")

    Examples of custom models:
    * Mixture of experts:
    frc_model = frc_class.CustomModel(GatingEnsemble, name="Mixture", estimators=[Lasso(alpha=0.01), Lasso(alpha=0.001)])
    * LSTM network:
    frc_model = frc_class.CustomModel(LSTM.LSTM, name="LSTM")

    """
    # Load and prepare dataset.
    ts_list = load_energy_weather_data()

    generator = gnt_class.FeatureGeneration(
        transformations='centroids')  #gnt_class.Monotone()

    # feature selection model can be defined in the same way. If you don't use any, just leave as is
    selector = sel_class.FeatureSelection(on=False)  #
    # first argument is your model class, then follow optional parameters as keyword arguments
    frc_model = frc_class.CustomModel(RandomForestRegressor, name="RF")
    #frc_class.CustomModel(Lasso, name="Lasso", alpha=0.001)

    # train your model:
    model = demo_train(ts_list,
                       frc_model=frc_model,
                       fg_mdl=generator,
                       fs_mdl=selector,
                       verbose=VERBOSE)

    # evaluate errors on the test set
    train_error, train_std = competition_errors(model=model,
                                                names=TRAIN_FILE_NAMES,
                                                y_idx=TS_IDX)
    test_error, test_std = competition_errors(model=model,
                                              names=TEST_FILE_NAMES,
                                              y_idx=TS_IDX)

    print(
        "Average MAPE across time series: train = {} with std {}, test = {} with std {}"
        .format(train_error, train_std, test_error, test_std))

    return train_error, test_error

Example #7

Show file

def demo_train(ts_struct_list,
               frc_model=None,
               fg_mdl=None,
               fs_mdl=None,
               verbose=False,
               return_model=False,
               rewrite=True):
    """
    Train and save the model.

    :param ts_struct_list: list of namedtuples tsStruct
    :param frc_model: forecasting model, instance of CustomModel
    :param fg_mdl: feature generation model, instance of FeatureGeneration
    :param fs_mdl: feature selection model, instance of FeatureSelection
    :param verbose: controls the output
    :return: testError, trainError, bias, model
    """

    # Check arguments:
    if fg_mdl is None:
        fg_mdl = frc_class.IdentityGenerator(name="Identity generator",
                                             on=False)

    if fs_mdl is None:
        fs_mdl = gnt_class.FeatureGeneration(
        )  # IdentityModel(name="Identity selector")

    if frc_model is None:
        frc_model = frc_class.CustomModel(Lasso, name="Lasso", alpha=0.01)

    model = frc_class.PipelineModel(gen_mdl=fg_mdl,
                                    sel_mdl=fs_mdl,
                                    frc_mdl=frc_model)
    results = []
    res_text = []

    for ts in ts_struct_list:
        data = regression_matrix.RegMatrix(ts, x_idx=TS_IDX, y_idx=TS_IDX)

        # Create regression matrix
        data.create_matrix(
            nsteps=N_STEPS, norm_flag=True
        )  # this creates data.Y, data.X and some other fields

        # Split data for training and testing
        data.train_test_split(TRAIN_TEST_RATIO)

        # train the model. This returns trained pipeline and its steps
        model, frc, gen, sel = model.train_model(data.trainX, data.trainY)

        selection_res = "\n Feature selection results: problem status {}, selected {} from {} \\\\ \n".\
            format(sel.status, len(sel.selected), sel.n_vars)

        frcY, _ = data.forecast(
            model)  # returns forecasted matrix of the same shape as data.Y
        # frcY, idx_frc = data.forecast(model, idx_rows=data.idx_test) # this would return forecasts only for data.testY

        data.plot_frc(n_frc=5, n_hist=10,
                      folder=SAVE_DIR)  #this saves figures into SAVE_DIR

        train_mae = data.mae(idx_rows=data.idx_train,
                             idx_original=data.original_index)
        train_mape = data.mape(idx_rows=data.idx_train,
                               idx_original=data.original_index)

        test_mae = data.mae(idx_rows=data.idx_test,
                            idx_original=data.original_index)
        test_mape = data.mape(idx_rows=data.idx_test,
                              idx_original=data.original_index)

        index = [ts.data[i].name for i in TS_IDX]
        res1 = pd.DataFrame(train_mae, index=index, columns=[("MAE", "train")])
        res2 = pd.DataFrame(train_mape,
                            index=index,
                            columns=[("MAPE", "train")])
        res3 = pd.DataFrame(test_mae, index=index, columns=[("MAE", "test")])
        res4 = pd.DataFrame(test_mape, index=index, columns=[("MAPE", "test")])
        res = pd.concat([res1, res2, res3, res4], axis=1)

        configuration_str = "\n Time series {} forecasted with {} + '{}' feature generation model and  " \
                            "'{}' feature selection model \\\\ \n".format(ts.name, frc.name, gen.name, sel.name)
        if verbose:
            print(configuration_str)
            print(selection_res)
            print(res)

        results.append(res)
        res_text.append(configuration_str)
        res_text.append(selection_res)

    saved_mdl_fname = model.save_model(
        file_name=FNAME_PREFIX, folder=SAVE_DIR)  # saving in not an option yet
    # model = frc_class.PipelineModel().load_model(file_name=fname)

    # write results into a latex file
    my_plots.save_to_latex(results,
                           df_names=res_text,
                           folder=SAVE_DIR,
                           rewrite=rewrite)
    print("Results saved to folder {}".format(SAVE_DIR))

    if return_model:
        return model, saved_mdl_fname

    return saved_mdl_fname