def cluster_method(individual, dataset, **kwargs): from pyFTS.common import Util, Membership from pyFTS.models import hofts from pyFTS.partitioners import Grid, Entropy from pyFTS.benchmarks import Measures import numpy as np if individual['mf'] == 1: mf = Membership.trimf elif individual['mf'] == 2: mf = Membership.trapmf elif individual['mf'] == 3 and individual['partitioner'] != 2: mf = Membership.gaussmf else: mf = Membership.trimf window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) parameters = kwargs.get('parameters', {}) errors = [] sizes = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): if individual['partitioner'] == 1: partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf) elif individual['partitioner'] == 2: npart = individual['npart'] if individual['npart'] > 10 else 10 partitioner = Entropy.EntropyPartitioner(data=train, npart=npart, func=mf) model = hofts.WeightedHighOrderFTS(partitioner=partitioner, lags=individual['lags'], alpha_cut=individual['alpha'], order=individual['order']) model.fit(train) forecasts = model.predict(test) #rmse, mape, u = Measures.get_point_statistics(test, model) rmse = Measures.rmse(test[model.max_lag:], forecasts) size = len(model) errors.append(rmse) sizes.append(size) return { 'parameters': individual, 'rmse': np.nanmean(errors), 'size': np.nanmean(size) }
def evaluation1(dataset, individual): from pyFTS.common import Util from pyFTS.benchmarks import Measures try: results = [] lengths = [] for count, train, test in Util.sliding_window(dataset, 800, train=.8, inc=.25): model = phenotype(individual, train) if model is None: return (None) rmse, _, _ = Measures.get_point_statistics(test, model) lengths.append(len(model)) results.append(rmse) _lags = sum(model.lags) * 100 rmse = np.nansum( [.6 * np.nanmean(results), .4 * np.nanstd(results)]) len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags]) return len_lags, rmse except Exception as ex: print("EXCEPTION!", str(ex), str(individual)) return np.inf
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ from pyFTS.models import hofts, ifts, pwfts from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.hyperparam.Evolutionary import phenotype, __measures import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS) parameters = kwargs.get('parameters', {}) if individual['f1'] is not None and individual['f2'] is not None: return {key: individual[key] for key in __measures} errors = [] lengths = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): model = phenotype(individual, train, fts_method=fts_method, parameters=parameters) forecasts = model.predict(test) rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) lengths.append(len(model)) errors.append(rmse) _lags = sum(model.lags) * 100 _rmse = np.nanmean(errors) _len = np.nanmean(lengths) f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)]) f2 = np.nansum([.4 * _len, .6 * _lags]) return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.fcm.GA import phenotype import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) #parameters = kwargs.get('parameters',{}) errors = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): model = phenotype(individual, train) if model is None: raise Exception("Phenotype returned None") model.uod_clip = False forecasts = model.predict(test) rmse = Measures.rmse( test[model.max_lag:], forecasts[:-1]) #.get_point_statistics(test, model) errors.append(rmse) _rmse = np.nanmean(errors) _std = np.nanstd(errors) #print("EVALUATION {}".format(individual)) return {'rmse': .6 * _rmse + .4 * _std}
def sliding_window_simple_search(data, windowsize, model, partitions, orders, **kwargs): _3d = len(orders) > 1 ret = [] errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) forecasted_best = [] figsize = kwargs.get('figsize', [10, 15]) fig = plt.figure(figsize=figsize) plotforecasts = kwargs.get('plotforecasts', False) if plotforecasts: ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height ax0.set_xlim([0, len(data)]) ax0.set_ylim([min(data) * 0.9, max(data) * 1.1]) ax0.set_title('Forecasts') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') min_rmse = 1000000.0 best = None intervals = kwargs.get('intervals', False) threshold = kwargs.get('threshold', 0.5) progressbar = kwargs.get('progressbar', None) rng1 = enumerate(partitions, start=0) if progressbar: from tqdm import tqdm rng1 = enumerate(tqdm(partitions), start=0) for pc, p in rng1: fs = Grid.GridPartitioner(data=data, npart=p) rng2 = enumerate(orders, start=0) if progressbar: rng2 = enumerate(tqdm(orders), start=0) for oc, o in rng2: _error = [] for ct, train, test in Util.sliding_window(data, windowsize, 0.8, **kwargs): fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs) fts.fit(train, order=o) if not intervals: forecasted = fts.forecast(test) if not fts.has_seasonality: _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1]))) else: _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted))) for kk in range(o): forecasted.insert(0, None) if plotforecasts: ax0.plot(forecasted, label=fts.name) else: forecasted = fts.forecast_interval(test) _error.append(1.0 - Measures.rmse_interval( np.array(test[o:]), np.array(forecasted[:-1]))) error = np.nanmean(_error) errors[oc, pc] = error if (min_rmse - error) > threshold: min_rmse = error best = fts forecasted_best = forecasted # print(min_rmse) if plotforecasts: # handles0, labels0 = ax0.get_legend_handles_labels() # ax0.legend(handles0, labels0) elev = kwargs.get('elev', 30) azim = kwargs.get('azim', 144) ax0.plot(test, label="Original", linewidth=3.0, color="black") if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) if not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d') if _3d: ax1.set_title('Error Surface') ax1.set_ylabel('Model order') ax1.set_xlabel('Number of partitions') ax1.set_zlabel('RMSE') X, Y = np.meshgrid(partitions, orders) surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) else: ax1 = fig.add_axes([0, 1, 0.9, 0.9]) ax1.set_title('Error Curve') ax1.set_ylabel('Number of partitions') ax1.set_xlabel('RMSE') ax0.plot(errors, partitions) ret.append(best) ret.append(forecasted_best) # plt.tight_layout() file = kwargs.get('file', None) save = kwargs.get('save', False) Util.show_and_save_image(fig, file, save) return ret
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): """ Sliding window benchmarks for FTS forecasters. For each data window, a train and test datasets will be splitted. For each train split, number of partitions and partitioning method will be created a partitioner model. And for each partitioner, order, steps ahead and FTS method a foreasting model will be trained. Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database (identified by the 'file' parameter) for posterior analysis. All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and informing the list of dispy nodes on 'nodes' parameter. The number of experiments is determined by 'windowsize' and 'inc' parameters. :param data: test data :param windowsize: size of sliding window :param train: percentual of sliding window data used to train the models :param kwargs: dict, optional arguments :keyword benchmark_methods: a list with Non FTS models to benchmark. The default is None. benchmark_methods_parameters: a list with Non FTS models parameters. The default is None. benchmark_models: A boolean value indicating if external FTS methods will be used on benchmark. The default is False. build_methods: A boolean value indicating if the default FTS methods will be used on benchmark. The default is True. dataset: the dataset name to identify the current set of benchmarks results on database. distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False file: file path to save the results. The default is benchmarks.db. inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods. models: a list with prebuilt FTS objects. The default is None. nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1]. orders: a list with orders of the models (for high order models). The default is [1,2,3]. partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10]. partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None. partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner]. progress: If true a progress bar will be displayed during the benchmarks. The default is False. start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0. steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1. tag: a name to identify the current set of benchmarks results on database. type: the forecasting type, one of these values: point(default), interval or distribution. The default is point. transformations: a list with data transformations do apply . The default is [None]. """ tag = __pop('tag', None, kwargs) dataset = __pop('dataset', None, kwargs) distributed = __pop('distributed', False, kwargs) transformations = kwargs.get('transformations', [None]) progress = kwargs.get('progress', None) type = kwargs.get("type", 'point') orders = __pop("orders", [1, 2, 3], kwargs) partitioners_models = __pop("partitioners_models", None, kwargs) partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs) partitions = __pop("partitions", [10], kwargs) steps_ahead = __pop('steps_ahead', [1], kwargs) methods = __pop('methods', None, kwargs) models = __pop('models', None, kwargs) pool = [] if models is None else models if methods is None: if type == 'point': methods = get_point_methods() elif type == 'interval': methods = get_interval_methods() elif type == 'distribution': methods = get_probabilistic_methods() build_methods = __pop("build_methods", True, kwargs) if build_methods: for method in methods: mfts = method() if mfts.is_high_order: for order in orders: if order >= mfts.min_order: mfts = method() mfts.order = order pool.append(mfts) else: mfts.order = 1 pool.append(mfts) benchmark_models = __pop("benchmark_models", False, kwargs) if benchmark_models != False: benchmark_methods = __pop("benchmark_methods", None, kwargs) benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs) benchmark_pool = [] if ( benchmark_models is None or not isinstance(benchmark_models, list)) \ else benchmark_models if benchmark_models is None and benchmark_methods is None: if type == 'point' or type == 'partition': benchmark_methods = get_benchmark_point_methods() elif type == 'interval': benchmark_methods = get_benchmark_interval_methods() elif type == 'distribution': benchmark_methods = get_benchmark_probabilistic_methods() if benchmark_methods is not None: for transformation in transformations: for count, model in enumerate(benchmark_methods, start=0): par = benchmark_methods_parameters[count] mfts = model(**par) mfts.append_transformation(transformation) benchmark_pool.append(mfts) if type == 'point': experiment_method = run_point synthesis_method = process_point_jobs elif type == 'interval': experiment_method = run_interval synthesis_method = process_interval_jobs elif type == 'distribution': experiment_method = run_probabilistic synthesis_method = process_probabilistic_jobs else: raise ValueError("Type parameter has a unkown value!") if distributed: import dispy, dispy.httpd nodes = kwargs.get("nodes", ['127.0.0.1']) cluster, http_server = cUtil.start_dispy_cluster( experiment_method, nodes) jobs = [] inc = __pop("inc", 0.1, kwargs) if progress: from tqdm import tqdm _tdata = len(data) / (windowsize * inc) _tasks = (len(partitioners_models) * len(orders) * len(partitions) * len(transformations) * len(steps_ahead)) _tbcmk = len(benchmark_pool) * len(steps_ahead) progressbar = tqdm(total=_tdata * _tasks + _tdata * _tbcmk, desc="Benchmarks:") file = kwargs.get('file', "benchmarks.db") conn = bUtil.open_benchmark_db(file) for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs): if benchmark_models != False: for model in benchmark_pool: for step in steps_ahead: kwargs['steps_ahead'] = step if not distributed: if progress: progressbar.update(1) try: job = experiment_method(deepcopy(model), None, train, test, **kwargs) synthesis_method(dataset, tag, job, conn) except Exception as ex: print('EXCEPTION! ', model.shortname, model.order) traceback.print_exc() else: job = cluster.submit(deepcopy(model), None, train, test, **kwargs) jobs.append(job) partitioners_pool = [] if partitioners_models is None: for transformation in transformations: for partition in partitions: for partitioner in partitioners_methods: data_train_fs = partitioner( data=train, npart=partition, transformation=transformation) partitioners_pool.append(data_train_fs) else: partitioners_pool = partitioners_models for step in steps_ahead: for partitioner in partitioners_pool: for _id, model in enumerate(pool, start=0): kwargs['steps_ahead'] = step if not distributed: if progress: progressbar.update(1) try: job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) synthesis_method(dataset, tag, job, conn) except Exception as ex: print('EXCEPTION! ', model.shortname, model.order, partitioner.name, partitioner.partitions, str(partitioner.transformation)) traceback.print_exc() else: job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) job.id = id # associate an ID to identify jobs (if needed later) jobs.append(job) if progress: progressbar.close() if distributed: for job in jobs: if progress: progressbar.update(1) job() if job.status == dispy.DispyJob.Finished and job is not None: tmp = job.result synthesis_method(dataset, tag, tmp, conn) else: print("status", job.status) print("result", job.result) print("stdout", job.stdout) print("stderr", job.exception) cluster.wait() # wait for all jobs to finish cUtil.stop_dispy_cluster(cluster, http_server) conn.close()
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ import logging from pyFTS.models import hofts, ifts, pwfts from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.hyperparam.Evolutionary import __measures from pyFTS.hyperparam.mvfts import phenotype from pyFTS.models.multivariate import mvfts, wmvfts, partitioner, variable, cmvfts, grid, granular, common import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS) parameters = kwargs.get('parameters', {}) tvar = kwargs.get('target_variable', None) if individual['f1'] is not None and individual['f2'] is not None: return {key: individual[key] for key in __measures} errors = [] lengths = [] kwargs2 = kwargs.copy() kwargs2.pop('fts_method') if 'parameters' in kwargs2: kwargs2.pop('parameters') for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): try: model = phenotype(individual, train, fts_method=fts_method, parameters=parameters, **kwargs2) forecasts = model.predict(test) rmse = Measures.rmse( test[tvar['data_label']].values[model.max_lag:], forecasts[:-1]) lengths.append(len(model)) errors.append(rmse) except Exception as ex: logging.exception("Error") lengths.append(np.nan) errors.append(np.nan) try: _rmse = np.nanmean(errors) _len = np.nanmean(lengths) f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)]) f2 = np.nansum([.9 * _len, .1 * np.nanstd(lengths)]) return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len} except Exception as ex: logging.exception("Error") return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}
def ahead_sliding_window(data, windowsize, steps, resolution, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner], partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, benchmark_models=None, benchmark_models_parameters=None, save=False, file=None, synthetic=False, nodes=None): """ Distributed sliding window benchmarks for FTS probabilistic forecasters :param data: :param windowsize: size of sliding window :param train: percentual of sliding window data used to train the models :param steps: :param resolution: :param models: FTS point forecasters :param partitioners: Universe of Discourse partitioner :param partitions: the max number of partitions on the Universe of Discourse :param max_order: the max order of the models (for high order models) :param transformation: data transformation :param indexer: seasonal indexer :param dump: :param save: save results :param file: file path to save the results :param synthetic: if true only the average and standard deviation of the results :param nodes: list of cluster nodes to distribute tasks :param depends: list of module dependencies :return: DataFrame with the results """ alphas = [0.05, 0.25] if benchmark_models is None and models is None: benchmark_models = [ arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA ] if benchmark_models_parameters is None: benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 0), (2, 0, 1), (2, 0, 2)] cluster = dispy.JobCluster(benchmarks.run_ahead, nodes=nodes) # , depends=dependencies) http_server = dispy.httpd.DispyHTTPServer(cluster) _process_start = time.time() print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now())) pool = [] jobs = [] objs = {} crps_interval = {} crps_distr = {} times1 = {} times2 = {} if models is None: models = benchmarks.get_probabilistic_methods() for model in models: mfts = model("") if mfts.is_high_order: for order in np.arange(1, max_order + 1): if order >= mfts.min_order: mfts = model("") mfts.order = order pool.append(mfts) else: pool.append(mfts) if benchmark_models is not None: for count, model in enumerate(benchmark_models, start=0): for a in alphas: par = benchmark_models_parameters[count] mfts = model(str(par if par is not None else ""), alpha=a, dist=True) mfts.order = par pool.append(mfts) experiments = 0 for ct, train, test in Util.sliding_window(data, windowsize, train, inc=inc): experiments += 1 benchmarks_only = {} if dump: print('\nWindow: {0}\n'.format(ct)) for partition in partitions: for partitioner in partitioners: data_train_fs = partitioner(train, partition, transformation=transformation) for id, m in enumerate(pool, start=0): if m.benchmark_only and m.shortname in benchmarks_only: continue else: benchmarks_only[m.shortname] = m job = cluster.submit(m, data_train_fs, train, test, steps, resolution, ct, transformation, indexer) job.id = id # associate an ID to identify jobs (if needed later) jobs.append(job) for job in jobs: tmp = job() if job.status == dispy.DispyJob.Finished and tmp is not None: if tmp['key'] not in objs: objs[tmp['key']] = tmp['obj'] crps_interval[tmp['key']] = [] crps_distr[tmp['key']] = [] times1[tmp['key']] = [] times2[tmp['key']] = [] crps_interval[tmp['key']].append_rhs(tmp['CRPS_Interval']) crps_distr[tmp['key']].append_rhs(tmp['CRPS_Distribution']) times1[tmp['key']].append_rhs(tmp['TIME_Interval']) times2[tmp['key']].append_rhs(tmp['TIME_Distribution']) else: print(job.exception) print(job.stdout) _process_end = time.time() print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now())) print("Process Duration: {0}".format(_process_end - _process_start)) cluster.wait() # wait for all jobs to finish cluster.print_status() http_server.shutdown() # this waits until browser gets all updates cluster.close() return bUtil.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic)
def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner], partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, benchmark_models=None, benchmark_models_parameters=None, save=False, file=None, sintetic=False, nodes=None, depends=None): """ Distributed sliding window benchmarks for FTS point forecasters :param data: :param windowsize: size of sliding window :param train: percentual of sliding window data used to train the models :param inc: percentual of window is used do increment :param models: FTS point forecasters :param partitioners: Universe of Discourse partitioner :param partitions: the max number of partitions on the Universe of Discourse :param max_order: the max order of the models (for high order models) :param transformation: data transformation :param indexer: seasonal indexer :param dump: :param benchmark_models: Non FTS models to benchmark :param benchmark_models_parameters: Non FTS models parameters :param save: save results :param file: file path to save the results :param sintetic: if true only the average and standard deviation of the results :param nodes: list of cluster nodes to distribute tasks :param depends: list of module dependencies :return: DataFrame with the results """ cluster = dispy.JobCluster(benchmarks.run_point, nodes=nodes) #, depends=dependencies) http_server = dispy.httpd.DispyHTTPServer(cluster) _process_start = time.time() print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now())) jobs = [] objs = {} rmse = {} smape = {} u = {} times = {} pool = build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters) experiments = 0 for ct, train, test in Util.sliding_window(data, windowsize, train, inc): experiments += 1 benchmarks_only = {} if dump: print('\nWindow: {0}\n'.format(ct)) for partition in partitions: for partitioner in partitioners: data_train_fs = partitioner(train, partition, transformation=transformation) for _id, m in enumerate(pool, start=0): if m.benchmark_only and m.shortname in benchmarks_only: continue else: benchmarks_only[m.shortname] = m job = cluster.submit(m, data_train_fs, train, test, ct, transformation) job.id = _id # associate an ID to identify jobs (if needed later) jobs.append(job) for job in jobs: tmp = job() if job.status == dispy.DispyJob.Finished and tmp is not None: if tmp['key'] not in objs: objs[tmp['key']] = tmp['obj'] rmse[tmp['key']] = [] smape[tmp['key']] = [] u[tmp['key']] = [] times[tmp['key']] = [] rmse[tmp['key']].append_rhs(tmp['rmse']) smape[tmp['key']].append_rhs(tmp['smape']) u[tmp['key']].append_rhs(tmp['u']) times[tmp['key']].append_rhs(tmp['time']) print(tmp['key'], tmp['window']) else: print(job.exception) print(job.stdout) _process_end = time.time() print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now())) print("Process Duration: {0}".format(_process_end - _process_start)) cluster.wait() # wait for all jobs to finish cluster.print_status() http_server.shutdown() # this waits until browser gets all updates cluster.close() return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
BSTS.ARIMA ] parameters = [{}, { 'order': (2, 0, 0) }, { 'order': 1, 'dist': True }, { 'order': (2, 0, 0) }] from pyFTS.benchmarks import Measures horizon = 5 for ct, train, test, in Util.sliding_window(data, 1000, 0.8, .5): print('data window {}'.format(ct)) for ct, method in enumerate(methods): model = method(**parameters[ct]) model.fit(train) start = model.order + 1 end = start + horizon intervals = model.predict(test[:10], type='interval', alpha=.25, steps_ahead=horizon) distributions = model.predict(test[:10], type='distribution', smooth='histogram', steps_ahead=horizon, num_bins=100)
jobs = [] models = [] for order in [1, 2, 3]: if order == 1: model = nsfts.NonStationaryFTS("") else: model = honsfts.HighOrderNonStationaryFTS("") model.order = order models.append(model) for ct, train, test in cUtil.sliding_window(data, 300): for partition in np.arange(5, 100, 1): tmp_partitioner = Grid.GridPartitioner(train, partition) partitioner = partitioners.PolynomialNonStationaryPartitioner(train, tmp_partitioner, window_size=35, degree=1) for model in models: # print(model.shortname, partition, model.order) #job = evaluate_individual_model(model, train, test) job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, 35, 240) job.id = ct + model.order*100 jobs.append(job) results = {} for job in jobs: tmp = job()
from pyFTS.common import Transformations, Util as cUtil diff = Transformations.Differential(lag=1) train = dataset[:1000] test = dataset[1000:] #grid = Grid.GridPartitioner(data=train, transformation=diff) #model = pwfts.ProbabilisticWeightedFTS(partitioner=grid) #model.append_transformation(diff) model = naive.Naive() model.fit(train) for ct, ttrain, ttest in cUtil.sliding_window(test, 1000, .95, inc=.5): if model.shortname not in ('PWFTS', 'Naive'): model.predict(ttrain) print(ttest) if len(ttest) > 0: forecasts = model.predict(ttest, steps_ahead=10) measures = Measures.get_point_ahead_statistics(ttest[1:11], forecasts) print(measures) ''' from pyFTS.models.nonstationary import partitioners as nspart, nsfts, honsfts fs = nspart.simplenonstationary_gridpartitioner_builder(data=train,npart=35,transformation=None) print(fs) #model = honsfts.HighOrderNonStationaryFTS(partitioner=fs, order=2) model = nsfts.WeightedNonStationaryFTS(partitioner=fs) model.fit(train)
def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner], partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, save=False, file=None, sintetic=False): """ Parallel sliding window benchmarks for FTS point forecasters :param data: :param windowsize: size of sliding window :param train: percentual of sliding window data used to train the models :param models: FTS point forecasters :param partitioners: Universe of Discourse partitioner :param partitions: the max number of partitions on the Universe of Discourse :param max_order: the max order of the models (for high order models) :param transformation: data transformation :param indexer: seasonal indexer :param dump: :param save: save results :param file: file path to save the results :param sintetic: if true only the average and standard deviation of the results :return: DataFrame with the results """ _process_start = time.time() print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now())) num_cores = multiprocessing.cpu_count() pool = [] objs = {} rmse = {} smape = {} u = {} times = {} for model in benchmarks.get_point_methods(): mfts = model("") if mfts.is_high_order: for order in np.arange(1, max_order + 1): if order >= mfts.min_order: mfts = model("") mfts.order = order pool.append(mfts) else: pool.append(mfts) experiments = 0 for ct, train, test in Util.sliding_window(data, windowsize, train): experiments += 1 if dump: print('\nWindow: {0}\n'.format(ct)) for partition in partitions: for partitioner in partitioners: data_train_fs = partitioner(train, partition, transformation=transformation) results = Parallel(n_jobs=num_cores)(delayed(run_point)( deepcopy(m), deepcopy(data_train_fs), deepcopy(train), deepcopy(test), transformation) for m in pool) for tmp in results: if tmp['key'] not in objs: objs[tmp['key']] = tmp['obj'] rmse[tmp['key']] = [] smape[tmp['key']] = [] u[tmp['key']] = [] times[tmp['key']] = [] rmse[tmp['key']].append_rhs(tmp['rmse']) smape[tmp['key']].append_rhs(tmp['smape']) u[tmp['key']].append_rhs(tmp['u']) times[tmp['key']].append_rhs(tmp['time']) _process_end = time.time() print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now())) print("Process Duration: {0}".format(_process_end - _process_start)) return Util.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
vload = variable.Variable("Load", data_label="load", alias='load', partitioner=Grid.GridPartitioner, npart=20, func=Membership.gaussmf, data=train_mv, alpha_cut=.3) rows = [] time_generator = lambda x: pd.to_datetime(x) + pd.to_timedelta(1, unit='h') for ct, train, test in cUtil.sliding_window(df, windowsize=32000, train=.98, inc=.05): print('Window {}'.format(ct)) for order in [1, 2, 3]: for knn in [1, 2, 3]: model = granular.GranularWMVFTS( explanatory_variables=[vhour, vtemp, vload], target_variable=vload, order=order, knn=knn) model.fit(train) forecasts1 = model.predict(test, type='multivariate') forecasts2 = model.predict(test, type='multivariate',