def print_point_statistics(data, models, externalmodels=None, externalforecasts=None, indexers=None): ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n" for count, model in enumerate(models, start=0): _rmse, _smape, _u = Measures.get_point_statistics( data, model, indexers) ret += model.shortname + " & " ret += str(model.order) + " & " ret += str(_rmse) + " & " ret += str(_smape) + " & " ret += str(_u) #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4)) ret += " \\\\ \n" if externalmodels is not None: l = len(externalmodels) for k in np.arange(0, l): ret += externalmodels[k] + " & " ret += " 1 & " ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]), 2)) + " & " ret += str( round(Measures.smape(data, externalforecasts[k][:-1]), 2)) + " & " ret += str( round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2)) ret += " \\\\ \n" print(ret)
def forecast_params(data, train_split, method, params, plot=False): train, test = sampling.train_test_split(data, train_split) fcst = method(train, test, params) _output = params['output'] _step = params.get('step', 1) _offset = params['order'] + _step - 1 yobs = test[_output].iloc[_offset:].values if plot: plt.figure(figsize=(20, 10)) plt.plot(yobs) plt.plot(fcst) plt.show() rmse = Measures.rmse(yobs, fcst) print("RMSE: ", rmse) nrmse = metrics.normalized_rmse(yobs, fcst) print("nRMSE: ", nrmse) smape = Measures.smape(yobs, fcst) print("SMAPE: ", smape) u = Measures.UStatistic(yobs, fcst) print("U Statistic: ", u) return rmse, nrmse, smape, u
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None): """ Run point benchmarks on given models and data and print the results :param data: test data :param models: a list of FTS models to benchmark :param externalmodels: a list with benchmark models (façades for other methods) :param externalforecasts: :param indexers: :return: """ ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n" for count,model in enumerate(models,start=0): _rmse, _smape, _u = Measures.get_point_statistics(data, model, indexers) ret += model.shortname + " & " ret += str(model.order) + " & " ret += str(_rmse) + " & " ret += str(_smape)+ " & " ret += str(_u) #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4)) ret += " \\\\ \n" if externalmodels is not None: l = len(externalmodels) for k in np.arange(0,l): ret += externalmodels[k] + " & " ret += " 1 & " ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]), 2)) + " & " ret += str(round(Measures.smape(data, externalforecasts[k][:-1]), 2))+ " & " ret += str(round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2)) ret += " \\\\ \n" print(ret)
def compareModelsTable(original, models_fo, models_ho): fig = plt.figure(figsize=[12, 4]) fig.suptitle("Comparação de modelos ") columns = ['Modelo', 'Ordem', 'Partições', 'RMSE', 'MAPE (%)'] rows = [] for model in models_fo: fts = model["model"] error_r = Measures.rmse(model["forecasted"], original) error_m = round(Measures.mape(model["forecasted"], original) * 100, 2) rows.append( [model["name"], fts.order, len(fts.sets), error_r, error_m]) for model in models_ho: fts = model["model"] error_r = Measures.rmse(model["forecasted"][fts.order:], original[fts.order:]) error_m = round( Measures.mape(model["forecasted"][fts.order:], original[fts.order:]) * 100, 2) rows.append( [model["name"], fts.order, len(fts.sets), error_r, error_m]) ax1 = fig.add_axes([0, 0, 1, 1]) # left, bottom, width, height ax1.set_xticks([]) ax1.set_yticks([]) ax1.table(cellText=rows, colLabels=columns, cellLoc='center', bbox=[0, 0, 1, 1]) sup = "\\begin{tabular}{" header = "" body = "" footer = "" for c in columns: sup = sup + "|c" if len(header) > 0: header = header + " & " header = header + "\\textbf{" + c + "} " sup = sup + "|} \\hline\n" header = header + "\\\\ \\hline \n" for r in rows: lin = "" for c in r: if len(lin) > 0: lin = lin + " & " lin = lin + str(c) body = body + lin + "\\\\ \\hline \n" return sup + header + body + "\\end{tabular}"
def cluster_method(individual, train, test): from pyFTS.common import Util, Membership from pyFTS.models import hofts from pyFTS.partitioners import Grid, Entropy from pyFTS.benchmarks import Measures if individual['mf'] == 1: mf = Membership.trimf elif individual['mf'] == 2: mf = Membership.trapmf elif individual['mf'] == 3 and individual['partitioner'] != 2: mf = Membership.gaussmf else: mf = Membership.trimf if individual['partitioner'] == 1: partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf) elif individual['partitioner'] == 2: npart = individual['npart'] if individual['npart'] > 10 else 10 partitioner = Entropy.EntropyPartitioner(data=train, npart=npart, func=mf) model = hofts.WeightedHighOrderFTS(partitioner=partitioner, lags=individual['lags'], alpha_cut=individual['alpha'], order=individual['order']) model.fit(train) rmse, mape, u = Measures.get_point_statistics(test, model) size = len(model) return individual, rmse, size, mape, u
def cluster_method(individual, dataset, **kwargs): from pyFTS.common import Util, Membership from pyFTS.models import hofts from pyFTS.partitioners import Grid, Entropy from pyFTS.benchmarks import Measures import numpy as np if individual['mf'] == 1: mf = Membership.trimf elif individual['mf'] == 2: mf = Membership.trapmf elif individual['mf'] == 3 and individual['partitioner'] != 2: mf = Membership.gaussmf else: mf = Membership.trimf window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) parameters = kwargs.get('parameters', {}) errors = [] sizes = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): if individual['partitioner'] == 1: partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf) elif individual['partitioner'] == 2: npart = individual['npart'] if individual['npart'] > 10 else 10 partitioner = Entropy.EntropyPartitioner(data=train, npart=npart, func=mf) model = hofts.WeightedHighOrderFTS(partitioner=partitioner, lags=individual['lags'], alpha_cut=individual['alpha'], order=individual['order']) model.fit(train) forecasts = model.predict(test) #rmse, mape, u = Measures.get_point_statistics(test, model) rmse = Measures.rmse(test[model.max_lag:], forecasts) size = len(model) errors.append(rmse) sizes.append(size) return { 'parameters': individual, 'rmse': np.nanmean(errors), 'size': np.nanmean(size) }
def evaluation1(dataset, individual): from pyFTS.common import Util from pyFTS.benchmarks import Measures try: results = [] lengths = [] for count, train, test in Util.sliding_window(dataset, 800, train=.8, inc=.25): model = phenotype(individual, train) if model is None: return (None) rmse, _, _ = Measures.get_point_statistics(test, model) lengths.append(len(model)) results.append(rmse) _lags = sum(model.lags) * 100 rmse = np.nansum( [.6 * np.nanmean(results), .4 * np.nanstd(results)]) len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags]) return len_lags, rmse except Exception as ex: print("EXCEPTION!", str(ex), str(individual)) return np.inf
def forecast_best_params(data, train_split, method_id, method, space, plot=False, save=False): print("Running experiment ", method_id) best = pickle.load(open("best_" + method_id + ".pkl", "rb")) train, test = sampling.train_test_split(data, train_split) best_params = space_eval(space, best) fcst = method(train, test, best_params) _order = best_params['order'] _output = best_params['output'] yobs = test[_output].iloc[_order:].values if plot: plt.figure(figsize=(20, 10)) plt.plot(yobs) plt.plot(fcst) plt.show() rmse = Measures.rmse(yobs, fcst) print("RMSE: ", rmse) nrmse = metrics.normalized_rmse(yobs, fcst) print("nRMSE: ", nrmse) smape = Measures.smape(yobs, fcst) print("SMAPE: ", smape) u = Measures.UStatistic(yobs, fcst) print("U Statistic: ", u) if save: results = { "method_id": method_id, "forecast": fcst, "RMSE": rmse, "SMAPE": smape, "U": u } pickle.dump(results, open("results_" + method_id + ".pkl", "wb")) return rmse, nrmse, smape, u
def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kwargs): """ Run the interval forecasting benchmarks :param mfts: FTS model :param partitioner: Universe of Discourse partitioner :param train_data: data used to train the model :param test_data: ata used to test the model :param window_key: id of the sliding window :param transformation: data transformation :param indexer: seasonal indexer :return: a dictionary with the benchmark results """ import time from pyFTS.models import hofts,ifts,pwfts from pyFTS.partitioners import Grid, Entropy, FCM from pyFTS.benchmarks import Measures, arima, quantreg tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS] tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner] tmp4 = [arima.ARIMA, quantreg.QuantileRegression] tmp3 = [Measures.get_interval_statistics] steps_ahead = kwargs.get('steps_ahead', 1) method = kwargs.get('method', None) if mfts.benchmark_only: _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha) else: pttr = str(partitioner.__module__).split('.')[-1] _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) mfts.partitioner = partitioner mfts.append_transformation(partitioner.transformation) _key += str(steps_ahead) _key += str(method) if method is not None else "" _start = time.time() mfts.fit(train_data, **kwargs) _end = time.time() times = _end - _start _start = time.time() #_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25 metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs) _end = time.time() times += _end - _start ret = {'key': _key, 'obj': mfts, 'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2], 'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6], 'winkler05': metrics[7], 'winkler25': metrics[8], 'window': window_key,'steps': steps_ahead, 'method': method} return ret
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ from pyFTS.models import hofts, ifts, pwfts from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.hyperparam.Evolutionary import phenotype, __measures import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS) parameters = kwargs.get('parameters', {}) if individual['f1'] is not None and individual['f2'] is not None: return {key: individual[key] for key in __measures} errors = [] lengths = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): model = phenotype(individual, train, fts_method=fts_method, parameters=parameters) forecasts = model.predict(test) rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) lengths.append(len(model)) errors.append(rmse) _lags = sum(model.lags) * 100 _rmse = np.nanmean(errors) _len = np.nanmean(lengths) f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)]) f2 = np.nansum([.4 * _len, .6 * _lags]) return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
def evaluate_individual_model(model, partitioner, train, test, window_size, time_displacement): import numpy as np from pyFTS.partitioners import Grid from pyFTS.benchmarks import Measures try: model.train(train, sets=partitioner.sets, order=model.order, parameters=window_size) forecasts = model.forecast(test, time_displacement=time_displacement, window_size=window_size) _rmse = Measures.rmse(test[model.order:], forecasts[:-1]) _mape = Measures.mape(test[model.order:], forecasts[:-1]) _u = Measures.UStatistic(test[model.order:], forecasts[:-1]) except Exception as e: print(e) _rmse = np.nan _mape = np.nan _u = np.nan return {'model': model.shortname, 'partitions': partitioner.partitions, 'order': model.order, 'rmse': _rmse, 'mape': _mape, 'u': _u}
def print_distribution_statistics(original, models, steps, resolution): ret = "Model & Order & Interval & Distribution \\\\ \n" for fts in models: _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics( original, fts, steps, resolution) ret += fts.shortname + " & " ret += str(fts.order) + " & " ret += str(_crps1) + " & " ret += str(_crps2) + " \\\\ \n" print(ret)
def rolling_window_forecast_params(data, train_percent, window_size, method, params): # get training days training_days = pd.unique(data.index.date) fcst = [] yobs = [] for day in training_days: print("Processing :", day) daily_data = data[data.index.date == day] nsamples = len(daily_data.index) train_size = round(nsamples * train_percent) test_end = 0 index = 0 while test_end < nsamples: train_start, train_end, test_start, test_end = get_data_index( index, train_size, window_size, nsamples) train = data[train_start:train_end] test = data[test_start:test_end] index += window_size f = method(train, test, params) fcst.extend(f) _step = params.get('step', 1) _output = params['output'] _offset = params['order'] + _step - 1 yobs.extend(test[_output].iloc[_offset:].values) rmse = Measures.rmse(yobs, fcst) print("RMSE: ", rmse) nrmse = metrics.normalized_rmse(yobs, fcst) print("nRMSE: ", nrmse) smape = Measures.smape(yobs, fcst) print("SMAPE: ", smape) u = Measures.UStatistic(yobs, fcst) print("U Statistic: ", u) return rmse, nrmse, smape, u
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.fcm.GA import phenotype import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) #parameters = kwargs.get('parameters',{}) errors = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): model = phenotype(individual, train) if model is None: raise Exception("Phenotype returned None") model.uod_clip = False forecasts = model.predict(test) rmse = Measures.rmse( test[model.max_lag:], forecasts[:-1]) #.get_point_statistics(test, model) errors.append(rmse) _rmse = np.nanmean(errors) _std = np.nanstd(errors) #print("EVALUATION {}".format(individual)) return {'rmse': .6 * _rmse + .4 * _std}
def print_interval_statistics(original, models): ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n" for fts in models: _sharp, _res, _cov, _q5, _q25, _q75, _q95 = Measures.get_interval_statistics( original, fts) ret += fts.shortname + " & " ret += str(fts.order) + " & " ret += str(_sharp) + " & " ret += str(_res) + " & " ret += str(_cov) + " &" ret += str(_q5) + " &" ret += str(_q25) + " &" ret += str(_q75) + " &" ret += str(_q95) + "\\\\ \n" print(ret)
def compare_residuals(data, models): """ Compare residual's statistics of several models :param data: test data :param models: :return: """ ret = "Model & Order & Mean & STD & Box-Pierce & Box-Ljung & P-value \\\\ \n" for mfts in models: forecasts = mfts.forecast(data) res = residuals(data, forecasts, mfts.order) mu = np.mean(res) sig = np.std(res) ret += mfts.shortname + " & " ret += str(mfts.order) + " & " ret += str(round(mu, 2)) + " & " ret += str(round(sig, 2)) + " & " q1 = Measures.BoxPierceStatistic(res, 10) ret += str(round(q1, 2)) + " & " q2 = Measures.BoxLjungStatistic(res, 10) ret += str(round(q2, 2)) + " & " ret += str(chi_squared(q2, 10)) ret += " \\\\ \n" return ret
def print_distribution_statistics(original, models, steps, resolution): """ Run probabilistic benchmarks on given models and data and print the results :param data: test data :param models: a list of FTS models to benchmark :return: """ ret = "Model & Order & Interval & Distribution \\\\ \n" for fts in models: _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(original, fts, steps, resolution) ret += fts.shortname + " & " ret += str(fts.order) + " & " ret += str(_crps1) + " & " ret += str(_crps2) + " \\\\ \n" print(ret)
def print_interval_statistics(original, models): """ Run interval benchmarks on given models and data and print the results :param data: test data :param models: a list of FTS models to benchmark :return: """ ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n" for fts in models: _sharp, _res, _cov, _q5, _q25, _q75, _q95 = Measures.get_interval_statistics(original, fts) ret += fts.shortname + " & " ret += str(fts.order) + " & " ret += str(_sharp) + " & " ret += str(_res) + " & " ret += str(_cov) + " &" ret += str(_q5) + " &" ret += str(_q25) + " &" ret += str(_q75) + " &" ret += str(_q95) + "\\\\ \n" print(ret)
def rolling_window_benchmark(data, train=0.8, **kwargs): resample = __pop('resample', None, kwargs) output = __pop('output', None, kwargs) if resample: data = sampling.resample_data(data, resample) train_data, test_data = sampling.train_test_split(data, train) methods = __pop('methods', None, kwargs) orders = __pop("orders", [1, 2, 3], kwargs) steps_ahead = __pop('steps_ahead', [1], kwargs) for method in methods: for order in orders: for step in steps_ahead: m = method() if isinstance(m, fts.FTS): partitioners = __pop("partitioners", [Grid.GridPartitioner], kwargs) partitions = __pop("partitions", [10], kwargs) for partitioner in partitioners: for partition in partitions: data_train_fs = partitioner(data=train_data, npart=partition) m.partitioner = data_train_fs # medir tempo de treinamento m.fit(train_data, **kwargs) # medir tempo de forecast yhat = m.predict() #_start = time.time() # implementar metricas de avaliacao _rmse = Measures.rmse(test_data[output].iloc[order:], yhat[:-step]) print("RMSE: ", _rmse)
from pyFTS.benchmarks import Measures from pyFTS.partitioners import Grid, Entropy from pyFTS.models import hofts from pyFTS.common import Membership x = [k for k in np.arange(-2 * np.pi, 2 * np.pi, 0.1)] y = [np.sin(k) for k in x] rows = [] fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5]) ax.plot(y, label='Original', color='black') for npart in np.arange(5, 35, 5): part = Grid.GridPartitioner(data=y, npart=npart) model = hofts.HighOrderFTS(order=1, partitioner=part) model.fit(y) forecasts = model.predict(y) ax.plot(forecasts[:-1], label=str(npart) + " partitions") rmse, mape, u = Measures.get_point_statistics(y, model) rows.append([npart, rmse, mape, u]) handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1)) df = pd.DataFrame(rows, columns=['Partitions', 'RMSE', 'MAPE', 'U'])
def normalized_rmse(targets, forecasts): if isinstance(targets, list): targets = np.array(targets) return Measures.rmse(targets, forecasts) / np.nanmean(targets)
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ import logging from pyFTS.models import hofts, ifts, pwfts from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.hyperparam.Evolutionary import __measures from pyFTS.hyperparam.mvfts import phenotype from pyFTS.models.multivariate import mvfts, wmvfts, partitioner, variable, cmvfts, grid, granular, common import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS) parameters = kwargs.get('parameters', {}) tvar = kwargs.get('target_variable', None) if individual['f1'] is not None and individual['f2'] is not None: return {key: individual[key] for key in __measures} errors = [] lengths = [] kwargs2 = kwargs.copy() kwargs2.pop('fts_method') if 'parameters' in kwargs2: kwargs2.pop('parameters') for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): try: model = phenotype(individual, train, fts_method=fts_method, parameters=parameters, **kwargs2) forecasts = model.predict(test) rmse = Measures.rmse( test[tvar['data_label']].values[model.max_lag:], forecasts[:-1]) lengths.append(len(model)) errors.append(rmse) except Exception as ex: logging.exception("Error") lengths.append(np.nan) errors.append(np.nan) try: _rmse = np.nanmean(errors) _len = np.nanmean(lengths) f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)]) f2 = np.nansum([.9 * _len, .1 * np.nanstd(lengths)]) return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len} except Exception as ex: logging.exception("Error") return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}
from pyFTS.data import TAIEX, SP500, NASDAQ, Malaysia dataset = Malaysia.get_data('temperature')[:1000] p = Grid.GridPartitioner(data=dataset, npart=20) print(p) model = hofts.WeightedHighOrderFTS(partitioner=p, order=2) model.fit(dataset) #[22, 22, 23, 23, 24]) print(model) Measures.get_point_statistics(dataset, model) ''' #dataset = SP500.get_data()[11500:16000] #dataset = NASDAQ.get_data() #print(len(dataset)) bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[chen.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS], benchmark_models=False, transformations=[None], #orders=[1, 2, 3], partitions=np.arange(10, 100, 2), progress=False, type="point", #steps_ahead=[1,2,4,6,8,10], distributed=False, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None, **kwargs): """ Probabilistic forecast benchmark function to be executed on cluster nodes :param mfts: FTS model :param partitioner: Universe of Discourse partitioner :param train_data: data used to train the model :param test_data: ata used to test the model :param steps: :param resolution: :param window_key: id of the sliding window :param transformation: data transformation :param indexer: seasonal indexer :return: a dictionary with the benchmark results """ import time import numpy as np from pyFTS.models import hofts, ifts, pwfts from pyFTS.models.ensemble import ensemble from pyFTS.partitioners import Grid, Entropy, FCM from pyFTS.benchmarks import Measures, arima, quantreg, knn from pyFTS.models.seasonal import SeasonalIndexer tmp = [ hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors ] tmp2 = [ Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner ] tmp3 = [ Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer ] indexer = kwargs.get('indexer', None) steps_ahead = kwargs.get('steps_ahead', 1) method = kwargs.get('method', None) if mfts.benchmark_only: _key = mfts.shortname + str( mfts.order if mfts.order is not None else "") + str(mfts.alpha) else: pttr = str(partitioner.__module__).split('.')[-1] _key = mfts.shortname + " n = " + str( mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) mfts.partitioner = partitioner mfts.append_transformation(partitioner.transformation) _key += str(steps_ahead) _key += str(method) if method is not None else "" if mfts.has_seasonality: mfts.indexer = indexer _start = time.time() mfts.fit(train_data, **kwargs) _end = time.time() times = _end - _start _crps1, _t1, _brier = Measures.get_distribution_statistics( test_data, mfts, **kwargs) _t1 += times ret = { 'key': _key, 'obj': mfts, 'CRPS': _crps1, 'time': _t1, 'brier': _brier, 'window': window_key, 'steps': steps_ahead, 'method': method } return ret
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs): """ Point forecast benchmark function to be executed on cluster nodes :param mfts: FTS model :param partitioner: Universe of Discourse partitioner :param train_data: data used to train the model :param test_data: ata used to test the model :param window_key: id of the sliding window :param transformation: data transformation :param indexer: seasonal indexer :return: a dictionary with the benchmark results """ import time from pyFTS.models import yu, chen, hofts, pwfts, ismailefendi, sadaei, song, cheng, hwang from pyFTS.partitioners import Grid, Entropy, FCM from pyFTS.benchmarks import Measures, naive, arima, quantreg from pyFTS.common import Transformations tmp = [ song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS, pwfts.ProbabilisticWeightedFTS ] tmp2 = [ Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner ] tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression] tmp3 = [Measures.get_point_statistics] tmp5 = [Transformations.Differential] indexer = kwargs.get('indexer', None) steps_ahead = kwargs.get('steps_ahead', 1) method = kwargs.get('method', None) if mfts.benchmark_only: _key = mfts.shortname + str( mfts.order if mfts.order is not None else "") else: pttr = str(partitioner.__module__).split('.')[-1] _key = mfts.shortname + " n = " + str( mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) mfts.partitioner = partitioner mfts.append_transformation(partitioner.transformation) _key += str(steps_ahead) _key += str(method) if method is not None else "" _start = time.time() mfts.fit(train_data, **kwargs) _end = time.time() times = _end - _start _start = time.time() _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs) _end = time.time() times += _end - _start ret = { 'key': _key, 'obj': mfts, 'rmse': _rmse, 'smape': _smape, 'u': _u, 'time': times, 'window': window_key, 'steps': steps_ahead, 'method': method } return ret
def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], plotforecasts=False, elev=30, azim=144, intervals=False, parameters=None, partitioner=Grid.GridPartitioner, transformation=None, indexer=None): _3d = len(orders) > 1 ret = [] if _3d: errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) else: errors = [] forecasted_best = [] fig = plt.figure(figsize=tam) # fig.suptitle("Comparação de modelos ") if plotforecasts: ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height ax0.set_xlim([0, len(train)]) ax0.set_ylim([min(train) * 0.9, max(train) * 1.1]) ax0.set_title('Forecasts') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') min_rmse = 1000000.0 best = None for pc, p in enumerate(partitions, start=0): sets = partitioner(data=train, npart=p, transformation=transformation).sets for oc, o in enumerate(orders, start=0): fts = model("q = " + str(p) + " n = " + str(o)) fts.append_transformation(transformation) fts.train(train, sets=sets, order=o, parameters=parameters) if not intervals: forecasted = fts.forecast(test) if not fts.has_seasonality: error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) else: error = Measures.rmse(np.array(test[o:]), np.array(forecasted)) for kk in range(o): forecasted.insert(0, None) if plotforecasts: ax0.plot(forecasted, label=fts.name) else: forecasted = fts.forecast_interval(test) error = 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1])) if _3d: errors[oc, pc] = error else: errors.append(error) if error < min_rmse: min_rmse = error best = fts forecasted_best = forecasted # print(min_rmse) if plotforecasts: # handles0, labels0 = ax0.get_legend_handles_labels() # ax0.legend(handles0, labels0) ax0.plot(test, label="Original", linewidth=3.0, color="black") if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) if _3d and not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) ax1.set_title('Error Surface') ax1.set_ylabel('Model order') ax1.set_xlabel('Number of partitions') ax1.set_zlabel('RMSE') X, Y = np.meshgrid(partitions, orders) surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) else: ax1 = fig.add_axes([0, 1, 0.9, 0.9]) ax1.set_title('Error Curve') ax1.set_xlabel('Number of partitions') ax1.set_ylabel('RMSE') ax1.plot(partitions, errors) ret.append(best) ret.append(forecasted_best) ret.append(min_rmse) # plt.tight_layout() cUtil.show_and_save_image(fig, file, save) return ret
df = loader.series_to_supervised(signals[key], n_in=_order, n_out=1) data_input = df.iloc[:, :_order].values data_output = df.iloc[:, -1].values l = len(df.index) limit = l // 2 train = data_input[:limit] test = data_input[limit:] ax[row].plot(data_output[limit + _order:], label="Original") ax[row].set_title(key) persistence_forecast = data_output[limit + _order - 1:-1] ax[row].plot(persistence_forecast, label="Persistence") _rmse = Measures.rmse(data_output[limit + _order:], persistence_forecast) data = [key, "Persistence", _rmse] evolving_model = evolvingclusterfts.EvolvingClusterFTS( defuzzy='weighted', membership_threshold=0.6, variance_limit=0.001) evolving_model.fit(train, order=_order) y_hat_df = pd.DataFrame(evolving_model.predict(test)) forecasts = y_hat_df.iloc[:, -1].values ax[row].plot(forecasts, label="EvolvingFTS") _rmse = Measures.rmse(data_output[limit + _order:], forecasts[:-1]) data = [key, "EvolvingFTS", _rmse] rows.append(data) fbem_model = FBeM.FBeM() fbem_model.n = _order fbem_model.fit(train, order=_order)
def sliding_window_simple_search(data, windowsize, model, partitions, orders, **kwargs): _3d = len(orders) > 1 ret = [] errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) forecasted_best = [] figsize = kwargs.get('figsize', [10, 15]) fig = plt.figure(figsize=figsize) plotforecasts = kwargs.get('plotforecasts', False) if plotforecasts: ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height ax0.set_xlim([0, len(data)]) ax0.set_ylim([min(data) * 0.9, max(data) * 1.1]) ax0.set_title('Forecasts') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') min_rmse = 1000000.0 best = None intervals = kwargs.get('intervals', False) threshold = kwargs.get('threshold', 0.5) progressbar = kwargs.get('progressbar', None) rng1 = enumerate(partitions, start=0) if progressbar: from tqdm import tqdm rng1 = enumerate(tqdm(partitions), start=0) for pc, p in rng1: fs = Grid.GridPartitioner(data=data, npart=p) rng2 = enumerate(orders, start=0) if progressbar: rng2 = enumerate(tqdm(orders), start=0) for oc, o in rng2: _error = [] for ct, train, test in Util.sliding_window(data, windowsize, 0.8, **kwargs): fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs) fts.fit(train, order=o) if not intervals: forecasted = fts.forecast(test) if not fts.has_seasonality: _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1]))) else: _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted))) for kk in range(o): forecasted.insert(0, None) if plotforecasts: ax0.plot(forecasted, label=fts.name) else: forecasted = fts.forecast_interval(test) _error.append(1.0 - Measures.rmse_interval( np.array(test[o:]), np.array(forecasted[:-1]))) error = np.nanmean(_error) errors[oc, pc] = error if (min_rmse - error) > threshold: min_rmse = error best = fts forecasted_best = forecasted # print(min_rmse) if plotforecasts: # handles0, labels0 = ax0.get_legend_handles_labels() # ax0.legend(handles0, labels0) elev = kwargs.get('elev', 30) azim = kwargs.get('azim', 144) ax0.plot(test, label="Original", linewidth=3.0, color="black") if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) if not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d') if _3d: ax1.set_title('Error Surface') ax1.set_ylabel('Model order') ax1.set_xlabel('Number of partitions') ax1.set_zlabel('RMSE') X, Y = np.meshgrid(partitions, orders) surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) else: ax1 = fig.add_axes([0, 1, 0.9, 0.9]) ax1.set_title('Error Curve') ax1.set_ylabel('Number of partitions') ax1.set_xlabel('RMSE') ax0.plot(errors, partitions) ret.append(best) ret.append(forecasted_best) # plt.tight_layout() file = kwargs.get('file', None) save = kwargs.get('save', False) Util.show_and_save_image(fig, file, save) return ret
def SelecaoSimples_MenorRMSE(original, parameters, modelo): ret = [] errors = [] forecasted_best = [] print("Série Original") fig = plt.figure(figsize=[20, 12]) fig.suptitle("Comparação de modelos ") ax0 = fig.add_axes([0, 0.5, 0.65, 0.45]) # left, bottom, width, height ax0.set_xlim([0, len(original)]) ax0.set_ylim([min(original), max(original)]) ax0.set_title('Série Temporal') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') ax0.plot(original, label="Original") min_rmse = 100000.0 best = None for p in parameters: sets = Grid.GridPartitioner(data=original, npart=p).sets fts = modelo(str(p) + " particoes") fts.train(original, sets=sets) # print(original) forecasted = fts.forecast(original) forecasted.insert(0, original[0]) # print(forecasted) ax0.plot(forecasted, label=fts.name) error = Measures.rmse(np.array(forecasted), np.array(original)) print(p, error) errors.append(error) if error < min_rmse: min_rmse = error best = fts forecasted_best = forecasted handles0, labels0 = ax0.get_legend_handles_labels() ax0.legend(handles0, labels0) ax1 = fig.add_axes([0.7, 0.5, 0.3, 0.45]) # left, bottom, width, height ax1.set_title('Comparação dos Erros Quadráticos Médios') ax1.set_ylabel('RMSE') ax1.set_xlabel('Quantidade de Partições') ax1.set_xlim([min(parameters), max(parameters)]) ax1.plot(parameters, errors) ret.append(best) ret.append(forecasted_best) # Modelo diferencial print("\nSérie Diferencial") difffts = Transformations.differential(original) errors = [] forecastedd_best = [] ax2 = fig.add_axes([0, 0, 0.65, 0.45]) # left, bottom, width, height ax2.set_xlim([0, len(difffts)]) ax2.set_ylim([min(difffts), max(difffts)]) ax2.set_title('Série Temporal') ax2.set_ylabel('F(T)') ax2.set_xlabel('T') ax2.plot(difffts, label="Original") min_rmse = 100000.0 bestd = None for p in parameters: sets = Grid.GridPartitioner(data=difffts, npart=p) fts = modelo(str(p) + " particoes") fts.train(difffts, sets=sets) forecasted = fts.forecast(difffts) forecasted.insert(0, difffts[0]) ax2.plot(forecasted, label=fts.name) error = Measures.rmse(np.array(forecasted), np.array(difffts)) print(p, error) errors.append(error) if error < min_rmse: min_rmse = error bestd = fts forecastedd_best = forecasted handles0, labels0 = ax2.get_legend_handles_labels() ax2.legend(handles0, labels0) ax3 = fig.add_axes([0.7, 0, 0.3, 0.45]) # left, bottom, width, height ax3.set_title('Comparação dos Erros Quadráticos Médios') ax3.set_ylabel('RMSE') ax3.set_xlabel('Quantidade de Partições') ax3.set_xlim([min(parameters), max(parameters)]) ax3.plot(parameters, errors) ret.append(bestd) ret.append(forecastedd_best) return ret
model = method(**parameters[ct]) model.fit(train) start = model.order + 1 end = start + horizon intervals = model.predict(test[:10], type='interval', alpha=.25, steps_ahead=horizon) distributions = model.predict(test[:10], type='distribution', smooth='histogram', steps_ahead=horizon, num_bins=100) print( model.name, Measures.get_interval_ahead_statistics(test[start:end], intervals)) print( model.name, Measures.get_distribution_ahead_statistics(test[start:end], distributions)) print('end') #f, ax = plt.subplots(1, 1, figsize=[20, 5]) #ax.plot(data) #bchmk.plot_interval(ax, forecasts, 3, "") #print(forecasts) ''' mu_local = 5 sigma_local = 0.25