예제 #1
0
def print_point_statistics(data,
                           models,
                           externalmodels=None,
                           externalforecasts=None,
                           indexers=None):
    ret = "Model		& Order     & RMSE		& SMAPE      & Theil's U		\\\\ \n"
    for count, model in enumerate(models, start=0):
        _rmse, _smape, _u = Measures.get_point_statistics(
            data, model, indexers)
        ret += model.shortname + "		& "
        ret += str(model.order) + "		& "
        ret += str(_rmse) + "		& "
        ret += str(_smape) + "		& "
        ret += str(_u)
        #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4))
        ret += "	\\\\ \n"
    if externalmodels is not None:
        l = len(externalmodels)
        for k in np.arange(0, l):
            ret += externalmodels[k] + "		& "
            ret += " 1		& "
            ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]),
                             2)) + "		& "
            ret += str(
                round(Measures.smape(data, externalforecasts[k][:-1]),
                      2)) + "		& "
            ret += str(
                round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2))
            ret += "	\\\\ \n"
    print(ret)
예제 #2
0
def forecast_params(data, train_split, method, params, plot=False):
    train, test = sampling.train_test_split(data, train_split)
    fcst = method(train, test, params)
    _output = params['output']
    _step = params.get('step', 1)
    _offset = params['order'] + _step - 1
    yobs = test[_output].iloc[_offset:].values

    if plot:
        plt.figure(figsize=(20, 10))
        plt.plot(yobs)
        plt.plot(fcst)
        plt.show()

    rmse = Measures.rmse(yobs, fcst)
    print("RMSE: ", rmse)

    nrmse = metrics.normalized_rmse(yobs, fcst)
    print("nRMSE: ", nrmse)

    smape = Measures.smape(yobs, fcst)
    print("SMAPE: ", smape)

    u = Measures.UStatistic(yobs, fcst)
    print("U Statistic: ", u)

    return rmse, nrmse, smape, u
예제 #3
0
파일: benchmarks.py 프로젝트: bekti7/pmi
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
    """
    Run point benchmarks on given models and data and print the results

    :param data: test data
    :param models: a list of FTS models to benchmark
    :param externalmodels: a list with benchmark models (façades for other methods)
    :param externalforecasts:
    :param indexers:
    :return:
    """
    ret = "Model		& Order     & RMSE		& SMAPE      & Theil's U		\\\\ \n"
    for count,model in enumerate(models,start=0):
        _rmse, _smape, _u = Measures.get_point_statistics(data, model, indexers)
        ret += model.shortname + "		& "
        ret += str(model.order) + "		& "
        ret += str(_rmse) + "		& "
        ret += str(_smape)+ "		& "
        ret += str(_u)
        #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4))
        ret += "	\\\\ \n"
    if externalmodels is not None:
        l = len(externalmodels)
        for k in np.arange(0,l):
            ret += externalmodels[k] + "		& "
            ret += " 1		& "
            ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]), 2)) + "		& "
            ret += str(round(Measures.smape(data, externalforecasts[k][:-1]), 2))+ "		& "
            ret += str(round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2))
            ret += "	\\\\ \n"
    print(ret)
예제 #4
0
def compareModelsTable(original, models_fo, models_ho):
    fig = plt.figure(figsize=[12, 4])
    fig.suptitle("Comparação de modelos ")
    columns = ['Modelo', 'Ordem', 'Partições', 'RMSE', 'MAPE (%)']
    rows = []
    for model in models_fo:
        fts = model["model"]
        error_r = Measures.rmse(model["forecasted"], original)
        error_m = round(Measures.mape(model["forecasted"], original) * 100, 2)
        rows.append(
            [model["name"], fts.order,
             len(fts.sets), error_r, error_m])
    for model in models_ho:
        fts = model["model"]
        error_r = Measures.rmse(model["forecasted"][fts.order:],
                                original[fts.order:])
        error_m = round(
            Measures.mape(model["forecasted"][fts.order:],
                          original[fts.order:]) * 100, 2)
        rows.append(
            [model["name"], fts.order,
             len(fts.sets), error_r, error_m])
    ax1 = fig.add_axes([0, 0, 1, 1])  # left, bottom, width, height
    ax1.set_xticks([])
    ax1.set_yticks([])
    ax1.table(cellText=rows,
              colLabels=columns,
              cellLoc='center',
              bbox=[0, 0, 1, 1])
    sup = "\\begin{tabular}{"
    header = ""
    body = ""
    footer = ""

    for c in columns:
        sup = sup + "|c"
        if len(header) > 0:
            header = header + " & "
        header = header + "\\textbf{" + c + "} "
    sup = sup + "|} \\hline\n"
    header = header + "\\\\ \\hline \n"

    for r in rows:
        lin = ""
        for c in r:
            if len(lin) > 0:
                lin = lin + " & "
            lin = lin + str(c)

        body = body + lin + "\\\\ \\hline \n"

    return sup + header + body + "\\end{tabular}"
예제 #5
0
def cluster_method(individual, train, test):
    from pyFTS.common import Util, Membership
    from pyFTS.models import hofts
    from pyFTS.partitioners import Grid, Entropy
    from pyFTS.benchmarks import Measures

    if individual['mf'] == 1:
        mf = Membership.trimf
    elif individual['mf'] == 2:
        mf = Membership.trapmf
    elif individual['mf'] == 3 and individual['partitioner'] != 2:
        mf = Membership.gaussmf
    else:
        mf = Membership.trimf

    if individual['partitioner'] == 1:
        partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf)
    elif individual['partitioner'] == 2:
        npart = individual['npart'] if individual['npart'] > 10 else 10
        partitioner = Entropy.EntropyPartitioner(data=train, npart=npart, func=mf)


    model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
                               lags=individual['lags'],
                               alpha_cut=individual['alpha'],
                               order=individual['order'])

    model.fit(train)

    rmse, mape, u = Measures.get_point_statistics(test, model)

    size = len(model)

    return individual, rmse, size, mape, u
예제 #6
0
def cluster_method(individual, dataset, **kwargs):
    from pyFTS.common import Util, Membership
    from pyFTS.models import hofts
    from pyFTS.partitioners import Grid, Entropy
    from pyFTS.benchmarks import Measures
    import numpy as np

    if individual['mf'] == 1:
        mf = Membership.trimf
    elif individual['mf'] == 2:
        mf = Membership.trapmf
    elif individual['mf'] == 3 and individual['partitioner'] != 2:
        mf = Membership.gaussmf
    else:
        mf = Membership.trimf

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    parameters = kwargs.get('parameters', {})

    errors = []
    sizes = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        if individual['partitioner'] == 1:
            partitioner = Grid.GridPartitioner(data=train,
                                               npart=individual['npart'],
                                               func=mf)
        elif individual['partitioner'] == 2:
            npart = individual['npart'] if individual['npart'] > 10 else 10
            partitioner = Entropy.EntropyPartitioner(data=train,
                                                     npart=npart,
                                                     func=mf)

        model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
                                           lags=individual['lags'],
                                           alpha_cut=individual['alpha'],
                                           order=individual['order'])
        model.fit(train)

        forecasts = model.predict(test)

        #rmse, mape, u = Measures.get_point_statistics(test, model)
        rmse = Measures.rmse(test[model.max_lag:], forecasts)

        size = len(model)

        errors.append(rmse)
        sizes.append(size)

    return {
        'parameters': individual,
        'rmse': np.nanmean(errors),
        'size': np.nanmean(size)
    }
예제 #7
0
def evaluation1(dataset, individual):
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures

    try:
        results = []
        lengths = []

        for count, train, test in Util.sliding_window(dataset,
                                                      800,
                                                      train=.8,
                                                      inc=.25):
            model = phenotype(individual, train)

            if model is None:
                return (None)

            rmse, _, _ = Measures.get_point_statistics(test, model)
            lengths.append(len(model))

            results.append(rmse)

            _lags = sum(model.lags) * 100

            rmse = np.nansum(
                [.6 * np.nanmean(results), .4 * np.nanstd(results)])
            len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags])

        return len_lags, rmse

    except Exception as ex:
        print("EXCEPTION!", str(ex), str(individual))
        return np.inf
예제 #8
0
def forecast_best_params(data,
                         train_split,
                         method_id,
                         method,
                         space,
                         plot=False,
                         save=False):
    print("Running experiment ", method_id)

    best = pickle.load(open("best_" + method_id + ".pkl", "rb"))
    train, test = sampling.train_test_split(data, train_split)
    best_params = space_eval(space, best)
    fcst = method(train, test, best_params)
    _order = best_params['order']
    _output = best_params['output']
    yobs = test[_output].iloc[_order:].values

    if plot:
        plt.figure(figsize=(20, 10))
        plt.plot(yobs)
        plt.plot(fcst)
        plt.show()

    rmse = Measures.rmse(yobs, fcst)
    print("RMSE: ", rmse)

    nrmse = metrics.normalized_rmse(yobs, fcst)
    print("nRMSE: ", nrmse)

    smape = Measures.smape(yobs, fcst)
    print("SMAPE: ", smape)

    u = Measures.UStatistic(yobs, fcst)
    print("U Statistic: ", u)

    if save:
        results = {
            "method_id": method_id,
            "forecast": fcst,
            "RMSE": rmse,
            "SMAPE": smape,
            "U": u
        }
        pickle.dump(results, open("results_" + method_id + ".pkl", "wb"))

    return rmse, nrmse, smape, u
예제 #9
0
파일: benchmarks.py 프로젝트: bekti7/pmi
def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
    """
    Run the interval forecasting benchmarks

    :param mfts: FTS model
    :param partitioner: Universe of Discourse partitioner
    :param train_data: data used to train the model
    :param test_data: ata used to test the model
    :param window_key: id of the sliding window
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :return: a dictionary with the benchmark results
    """
    import time
    from pyFTS.models import hofts,ifts,pwfts
    from pyFTS.partitioners import Grid, Entropy, FCM
    from pyFTS.benchmarks import Measures, arima, quantreg

    tmp = [hofts.HighOrderFTS, ifts.IntervalFTS,  pwfts.ProbabilisticWeightedFTS]

    tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]

    tmp4 = [arima.ARIMA, quantreg.QuantileRegression]

    tmp3 = [Measures.get_interval_statistics]

    steps_ahead = kwargs.get('steps_ahead', 1)
    method = kwargs.get('method', None)

    if mfts.benchmark_only:
        _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
    else:
        pttr = str(partitioner.__module__).split('.')[-1]
        _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
        mfts.partitioner = partitioner
        mfts.append_transformation(partitioner.transformation)

    _key += str(steps_ahead)
    _key += str(method) if method is not None else ""

    _start = time.time()
    mfts.fit(train_data, **kwargs)
    _end = time.time()
    times = _end - _start

    _start = time.time()
    #_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25
    metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
    _end = time.time()
    times += _end - _start

    ret = {'key': _key, 'obj': mfts, 'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
           'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6],
           'winkler05': metrics[7], 'winkler25': metrics[8],
           'window': window_key,'steps': steps_ahead, 'method': method}

    return ret
예제 #10
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.hyperparam.Evolutionary import phenotype, __measures
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
    parameters = kwargs.get('parameters', {})

    if individual['f1'] is not None and individual['f2'] is not None:
        return {key: individual[key] for key in __measures}

    errors = []
    lengths = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        model = phenotype(individual,
                          train,
                          fts_method=fts_method,
                          parameters=parameters)

        forecasts = model.predict(test)

        rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1])
        lengths.append(len(model))

        errors.append(rmse)

    _lags = sum(model.lags) * 100

    _rmse = np.nanmean(errors)
    _len = np.nanmean(lengths)

    f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
    f2 = np.nansum([.4 * _len, .6 * _lags])

    return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
예제 #11
0
def evaluate_individual_model(model, partitioner, train, test, window_size, time_displacement):
    import numpy as np
    from pyFTS.partitioners import Grid
    from pyFTS.benchmarks import Measures

    try:
        model.train(train, sets=partitioner.sets, order=model.order, parameters=window_size)
        forecasts = model.forecast(test, time_displacement=time_displacement, window_size=window_size)
        _rmse = Measures.rmse(test[model.order:], forecasts[:-1])
        _mape = Measures.mape(test[model.order:], forecasts[:-1])
        _u = Measures.UStatistic(test[model.order:], forecasts[:-1])
    except Exception as e:
        print(e)
        _rmse = np.nan
        _mape = np.nan
        _u = np.nan

    return {'model': model.shortname, 'partitions': partitioner.partitions, 'order': model.order,
            'rmse': _rmse, 'mape': _mape, 'u': _u}
예제 #12
0
def print_distribution_statistics(original, models, steps, resolution):
    ret = "Model	& Order     &  Interval & Distribution	\\\\ \n"
    for fts in models:
        _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(
            original, fts, steps, resolution)
        ret += fts.shortname + "		& "
        ret += str(fts.order) + "		& "
        ret += str(_crps1) + "		& "
        ret += str(_crps2) + "	\\\\ \n"
    print(ret)
예제 #13
0
def rolling_window_forecast_params(data, train_percent, window_size, method,
                                   params):

    # get training days
    training_days = pd.unique(data.index.date)
    fcst = []
    yobs = []

    for day in training_days:
        print("Processing :", day)
        daily_data = data[data.index.date == day]
        nsamples = len(daily_data.index)
        train_size = round(nsamples * train_percent)
        test_end = 0
        index = 0

        while test_end < nsamples:
            train_start, train_end, test_start, test_end = get_data_index(
                index, train_size, window_size, nsamples)
            train = data[train_start:train_end]
            test = data[test_start:test_end]
            index += window_size
            f = method(train, test, params)
            fcst.extend(f)
            _step = params.get('step', 1)
            _output = params['output']
            _offset = params['order'] + _step - 1
            yobs.extend(test[_output].iloc[_offset:].values)

    rmse = Measures.rmse(yobs, fcst)
    print("RMSE: ", rmse)

    nrmse = metrics.normalized_rmse(yobs, fcst)
    print("nRMSE: ", nrmse)

    smape = Measures.smape(yobs, fcst)
    print("SMAPE: ", smape)

    u = Measures.UStatistic(yobs, fcst)
    print("U Statistic: ", u)

    return rmse, nrmse, smape, u
예제 #14
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.fcm.GA import phenotype
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    #parameters = kwargs.get('parameters',{})

    errors = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        model = phenotype(individual, train)

        if model is None:
            raise Exception("Phenotype returned None")

        model.uod_clip = False

        forecasts = model.predict(test)

        rmse = Measures.rmse(
            test[model.max_lag:],
            forecasts[:-1])  #.get_point_statistics(test, model)

        errors.append(rmse)

    _rmse = np.nanmean(errors)
    _std = np.nanstd(errors)

    #print("EVALUATION {}".format(individual))
    return {'rmse': .6 * _rmse + .4 * _std}
예제 #15
0
def print_interval_statistics(original, models):
    ret = "Model	& Order     & Sharpness		& Resolution		& Coverage & .05  & .25 & .75 & .95	\\\\ \n"
    for fts in models:
        _sharp, _res, _cov, _q5, _q25, _q75, _q95 = Measures.get_interval_statistics(
            original, fts)
        ret += fts.shortname + "		& "
        ret += str(fts.order) + "		& "
        ret += str(_sharp) + "		& "
        ret += str(_res) + "		& "
        ret += str(_cov) + "        &"
        ret += str(_q5) + "        &"
        ret += str(_q25) + "        &"
        ret += str(_q75) + "        &"
        ret += str(_q95) + "\\\\ \n"
    print(ret)
예제 #16
0
def compare_residuals(data, models):
    """
    Compare residual's statistics of several models
    :param data: test data
    :param models: 
    :return: 
    """
    ret = "Model		& Order     & Mean      & STD       & Box-Pierce    & Box-Ljung & P-value \\\\ \n"
    for mfts in models:
        forecasts = mfts.forecast(data)
        res = residuals(data, forecasts, mfts.order)
        mu = np.mean(res)
        sig = np.std(res)
        ret += mfts.shortname + "		& "
        ret += str(mfts.order) + "		& "
        ret += str(round(mu, 2)) + "		& "
        ret += str(round(sig, 2)) + "		& "
        q1 = Measures.BoxPierceStatistic(res, 10)
        ret += str(round(q1, 2)) + "		& "
        q2 = Measures.BoxLjungStatistic(res, 10)
        ret += str(round(q2, 2)) + "		& "
        ret += str(chi_squared(q2, 10))
        ret += "	\\\\ \n"
    return ret
예제 #17
0
파일: benchmarks.py 프로젝트: bekti7/pmi
def print_distribution_statistics(original, models, steps, resolution):
    """
    Run probabilistic benchmarks on given models and data and print the results

    :param data: test data
    :param models: a list of FTS models to benchmark
    :return:
    """
    ret = "Model	& Order     &  Interval & Distribution	\\\\ \n"
    for fts in models:
        _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(original, fts, steps, resolution)
        ret += fts.shortname + "		& "
        ret += str(fts.order) + "		& "
        ret += str(_crps1) + "		& "
        ret += str(_crps2) + "	\\\\ \n"
    print(ret)
예제 #18
0
파일: benchmarks.py 프로젝트: bekti7/pmi
def print_interval_statistics(original, models):
    """
    Run interval benchmarks on given models and data and print the results

    :param data: test data
    :param models: a list of FTS models to benchmark
    :return:
    """
    ret = "Model	& Order     & Sharpness		& Resolution		& Coverage & .05  & .25 & .75 & .95	\\\\ \n"
    for fts in models:
        _sharp, _res, _cov, _q5, _q25, _q75, _q95  = Measures.get_interval_statistics(original, fts)
        ret += fts.shortname + "		& "
        ret += str(fts.order) + "		& "
        ret += str(_sharp) + "		& "
        ret += str(_res) + "		& "
        ret += str(_cov) + "        &"
        ret += str(_q5) + "        &"
        ret += str(_q25) + "        &"
        ret += str(_q75) + "        &"
        ret += str(_q95) + "\\\\ \n"
    print(ret)
예제 #19
0
def rolling_window_benchmark(data, train=0.8, **kwargs):
    resample = __pop('resample', None, kwargs)
    output = __pop('output', None, kwargs)

    if resample:
        data = sampling.resample_data(data, resample)

    train_data, test_data = sampling.train_test_split(data, train)

    methods = __pop('methods', None, kwargs)
    orders = __pop("orders", [1, 2, 3], kwargs)
    steps_ahead = __pop('steps_ahead', [1], kwargs)

    for method in methods:
        for order in orders:
            for step in steps_ahead:
                m = method()

                if isinstance(m, fts.FTS):
                    partitioners = __pop("partitioners",
                                         [Grid.GridPartitioner], kwargs)
                    partitions = __pop("partitions", [10], kwargs)
                    for partitioner in partitioners:
                        for partition in partitions:
                            data_train_fs = partitioner(data=train_data,
                                                        npart=partition)
                            m.partitioner = data_train_fs

                # medir tempo de treinamento
                m.fit(train_data, **kwargs)

                # medir tempo de forecast
                yhat = m.predict()
                #_start = time.time()

                # implementar metricas de avaliacao
                _rmse = Measures.rmse(test_data[output].iloc[order:],
                                      yhat[:-step])
                print("RMSE: ", _rmse)
예제 #20
0
from pyFTS.benchmarks import Measures
from pyFTS.partitioners import Grid, Entropy
from pyFTS.models import hofts
from pyFTS.common import Membership

x = [k for k in np.arange(-2 * np.pi, 2 * np.pi, 0.1)]
y = [np.sin(k) for k in x]

rows = []

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])

ax.plot(y, label='Original', color='black')

for npart in np.arange(5, 35, 5):
    part = Grid.GridPartitioner(data=y, npart=npart)
    model = hofts.HighOrderFTS(order=1, partitioner=part)
    model.fit(y)
    forecasts = model.predict(y)

    ax.plot(forecasts[:-1], label=str(npart) + " partitions")

    rmse, mape, u = Measures.get_point_statistics(y, model)

    rows.append([npart, rmse, mape, u])

handles, labels = ax.get_legend_handles_labels()
lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))

df = pd.DataFrame(rows, columns=['Partitions', 'RMSE', 'MAPE', 'U'])
예제 #21
0
def normalized_rmse(targets, forecasts):
    if isinstance(targets, list):
        targets = np.array(targets)

    return Measures.rmse(targets, forecasts) / np.nanmean(targets)
예제 #22
0
파일: mvfts.py 프로젝트: zilla-liu/pyFTS
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    import logging
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.hyperparam.Evolutionary import __measures
    from pyFTS.hyperparam.mvfts import phenotype
    from pyFTS.models.multivariate import mvfts, wmvfts, partitioner, variable, cmvfts, grid, granular, common
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS)
    parameters = kwargs.get('parameters', {})
    tvar = kwargs.get('target_variable', None)

    if individual['f1'] is not None and individual['f2'] is not None:
        return {key: individual[key] for key in __measures}

    errors = []
    lengths = []

    kwargs2 = kwargs.copy()
    kwargs2.pop('fts_method')
    if 'parameters' in kwargs2:
        kwargs2.pop('parameters')

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        try:

            model = phenotype(individual,
                              train,
                              fts_method=fts_method,
                              parameters=parameters,
                              **kwargs2)

            forecasts = model.predict(test)

            rmse = Measures.rmse(
                test[tvar['data_label']].values[model.max_lag:],
                forecasts[:-1])
            lengths.append(len(model))

            errors.append(rmse)

        except Exception as ex:
            logging.exception("Error")

            lengths.append(np.nan)
            errors.append(np.nan)

    try:
        _rmse = np.nanmean(errors)
        _len = np.nanmean(lengths)

        f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
        f2 = np.nansum([.9 * _len, .1 * np.nanstd(lengths)])

        return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
    except Exception as ex:
        logging.exception("Error")
        return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}
예제 #23
0
파일: general.py 프로젝트: ZuoMatthew/pyFTS
from pyFTS.data import TAIEX, SP500, NASDAQ, Malaysia

dataset = Malaysia.get_data('temperature')[:1000]

p = Grid.GridPartitioner(data=dataset, npart=20)

print(p)

model = hofts.WeightedHighOrderFTS(partitioner=p, order=2)

model.fit(dataset)  #[22, 22, 23, 23, 24])

print(model)

Measures.get_point_statistics(dataset, model)
'''
#dataset = SP500.get_data()[11500:16000]
#dataset = NASDAQ.get_data()
#print(len(dataset))


bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
                                methods=[chen.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS],
                                benchmark_models=False,
                                transformations=[None],
                                #orders=[1, 2, 3],
                                partitions=np.arange(10, 100, 2),
                                progress=False, type="point",
                                #steps_ahead=[1,2,4,6,8,10],
                                distributed=False, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
예제 #24
0
def run_probabilistic(mfts,
                      partitioner,
                      train_data,
                      test_data,
                      window_key=None,
                      **kwargs):
    """
    Probabilistic forecast benchmark function to be executed on cluster nodes
    :param mfts: FTS model
    :param partitioner: Universe of Discourse partitioner
    :param train_data: data used to train the model
    :param test_data: ata used to test the model
    :param steps:
    :param resolution:
    :param window_key: id of the sliding window
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :return: a dictionary with the benchmark results
    """
    import time
    import numpy as np
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.models.ensemble import ensemble
    from pyFTS.partitioners import Grid, Entropy, FCM
    from pyFTS.benchmarks import Measures, arima, quantreg, knn
    from pyFTS.models.seasonal import SeasonalIndexer

    tmp = [
        hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS,
        arima.ARIMA, ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors
    ]

    tmp2 = [
        Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner
    ]

    tmp3 = [
        Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer,
        SeasonalIndexer.LinearSeasonalIndexer
    ]

    indexer = kwargs.get('indexer', None)

    steps_ahead = kwargs.get('steps_ahead', 1)
    method = kwargs.get('method', None)

    if mfts.benchmark_only:
        _key = mfts.shortname + str(
            mfts.order if mfts.order is not None else "") + str(mfts.alpha)
    else:
        pttr = str(partitioner.__module__).split('.')[-1]
        _key = mfts.shortname + " n = " + str(
            mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
        mfts.partitioner = partitioner
        mfts.append_transformation(partitioner.transformation)

    _key += str(steps_ahead)
    _key += str(method) if method is not None else ""

    if mfts.has_seasonality:
        mfts.indexer = indexer

    _start = time.time()
    mfts.fit(train_data, **kwargs)
    _end = time.time()
    times = _end - _start

    _crps1, _t1, _brier = Measures.get_distribution_statistics(
        test_data, mfts, **kwargs)
    _t1 += times

    ret = {
        'key': _key,
        'obj': mfts,
        'CRPS': _crps1,
        'time': _t1,
        'brier': _brier,
        'window': window_key,
        'steps': steps_ahead,
        'method': method
    }

    return ret
예제 #25
0
def run_point(mfts,
              partitioner,
              train_data,
              test_data,
              window_key=None,
              **kwargs):
    """
    Point forecast benchmark function to be executed on cluster nodes
    :param mfts: FTS model
    :param partitioner: Universe of Discourse partitioner
    :param train_data: data used to train the model
    :param test_data: ata used to test the model
    :param window_key: id of the sliding window
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :return: a dictionary with the benchmark results
    """
    import time
    from pyFTS.models import yu, chen, hofts, pwfts, ismailefendi, sadaei, song, cheng, hwang
    from pyFTS.partitioners import Grid, Entropy, FCM
    from pyFTS.benchmarks import Measures, naive, arima, quantreg
    from pyFTS.common import Transformations

    tmp = [
        song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS,
        ismailefendi.ImprovedWeightedFTS, cheng.TrendWeightedFTS,
        sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
        pwfts.ProbabilisticWeightedFTS
    ]

    tmp2 = [
        Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner
    ]

    tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]

    tmp3 = [Measures.get_point_statistics]

    tmp5 = [Transformations.Differential]

    indexer = kwargs.get('indexer', None)

    steps_ahead = kwargs.get('steps_ahead', 1)
    method = kwargs.get('method', None)

    if mfts.benchmark_only:
        _key = mfts.shortname + str(
            mfts.order if mfts.order is not None else "")
    else:
        pttr = str(partitioner.__module__).split('.')[-1]
        _key = mfts.shortname + " n = " + str(
            mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
        mfts.partitioner = partitioner
        mfts.append_transformation(partitioner.transformation)

    _key += str(steps_ahead)
    _key += str(method) if method is not None else ""

    _start = time.time()
    mfts.fit(train_data, **kwargs)
    _end = time.time()
    times = _end - _start

    _start = time.time()
    _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts,
                                                      **kwargs)
    _end = time.time()
    times += _end - _start

    ret = {
        'key': _key,
        'obj': mfts,
        'rmse': _rmse,
        'smape': _smape,
        'u': _u,
        'time': times,
        'window': window_key,
        'steps': steps_ahead,
        'method': method
    }

    return ret
예제 #26
0
def simpleSearch_RMSE(train,
                      test,
                      model,
                      partitions,
                      orders,
                      save=False,
                      file=None,
                      tam=[10, 15],
                      plotforecasts=False,
                      elev=30,
                      azim=144,
                      intervals=False,
                      parameters=None,
                      partitioner=Grid.GridPartitioner,
                      transformation=None,
                      indexer=None):
    _3d = len(orders) > 1
    ret = []
    if _3d:
        errors = np.array([[0 for k in range(len(partitions))]
                           for kk in range(len(orders))])
    else:
        errors = []
    forecasted_best = []
    fig = plt.figure(figsize=tam)
    # fig.suptitle("Comparação de modelos ")
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(train)])
        ax0.set_ylim([min(train) * 0.9, max(train) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    for pc, p in enumerate(partitions, start=0):

        sets = partitioner(data=train, npart=p,
                           transformation=transformation).sets
        for oc, o in enumerate(orders, start=0):
            fts = model("q = " + str(p) + " n = " + str(o))
            fts.append_transformation(transformation)
            fts.train(train, sets=sets, order=o, parameters=parameters)
            if not intervals:
                forecasted = fts.forecast(test)
                if not fts.has_seasonality:
                    error = Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1]))
                else:
                    error = Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted))
                for kk in range(o):
                    forecasted.insert(0, None)
                if plotforecasts: ax0.plot(forecasted, label=fts.name)
            else:
                forecasted = fts.forecast_interval(test)
                error = 1.0 - Measures.rmse_interval(np.array(test[o:]),
                                                     np.array(forecasted[:-1]))
            if _3d:
                errors[oc, pc] = error
            else:
                errors.append(error)
            if error < min_rmse:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if _3d and not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_xlabel('Number of partitions')
        ax1.set_ylabel('RMSE')
        ax1.plot(partitions, errors)
    ret.append(best)
    ret.append(forecasted_best)
    ret.append(min_rmse)

    # plt.tight_layout()

    cUtil.show_and_save_image(fig, file, save)

    return ret
예제 #27
0
    df = loader.series_to_supervised(signals[key], n_in=_order, n_out=1)
    data_input = df.iloc[:, :_order].values
    data_output = df.iloc[:, -1].values

    l = len(df.index)
    limit = l // 2
    train = data_input[:limit]
    test = data_input[limit:]

    ax[row].plot(data_output[limit + _order:], label="Original")
    ax[row].set_title(key)

    persistence_forecast = data_output[limit + _order - 1:-1]
    ax[row].plot(persistence_forecast, label="Persistence")
    _rmse = Measures.rmse(data_output[limit + _order:], persistence_forecast)
    data = [key, "Persistence", _rmse]

    evolving_model = evolvingclusterfts.EvolvingClusterFTS(
        defuzzy='weighted', membership_threshold=0.6, variance_limit=0.001)
    evolving_model.fit(train, order=_order)
    y_hat_df = pd.DataFrame(evolving_model.predict(test))
    forecasts = y_hat_df.iloc[:, -1].values
    ax[row].plot(forecasts, label="EvolvingFTS")
    _rmse = Measures.rmse(data_output[limit + _order:], forecasts[:-1])
    data = [key, "EvolvingFTS", _rmse]
    rows.append(data)

    fbem_model = FBeM.FBeM()
    fbem_model.n = _order
    fbem_model.fit(train, order=_order)
예제 #28
0
파일: Util.py 프로젝트: goldstar111/pyFTS
def sliding_window_simple_search(data, windowsize, model, partitions, orders,
                                 **kwargs):

    _3d = len(orders) > 1
    ret = []
    errors = np.array([[0 for k in range(len(partitions))]
                       for kk in range(len(orders))])
    forecasted_best = []

    figsize = kwargs.get('figsize', [10, 15])
    fig = plt.figure(figsize=figsize)

    plotforecasts = kwargs.get('plotforecasts', False)
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(data)])
        ax0.set_ylim([min(data) * 0.9, max(data) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    intervals = kwargs.get('intervals', False)
    threshold = kwargs.get('threshold', 0.5)

    progressbar = kwargs.get('progressbar', None)

    rng1 = enumerate(partitions, start=0)

    if progressbar:
        from tqdm import tqdm
        rng1 = enumerate(tqdm(partitions), start=0)

    for pc, p in rng1:
        fs = Grid.GridPartitioner(data=data, npart=p)

        rng2 = enumerate(orders, start=0)

        if progressbar:
            rng2 = enumerate(tqdm(orders), start=0)

        for oc, o in rng2:
            _error = []
            for ct, train, test in Util.sliding_window(data, windowsize, 0.8,
                                                       **kwargs):
                fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs)
                fts.fit(train, order=o)
                if not intervals:
                    forecasted = fts.forecast(test)
                    if not fts.has_seasonality:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1])))
                    else:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted)))
                    for kk in range(o):
                        forecasted.insert(0, None)
                    if plotforecasts: ax0.plot(forecasted, label=fts.name)
                else:
                    forecasted = fts.forecast_interval(test)
                    _error.append(1.0 - Measures.rmse_interval(
                        np.array(test[o:]), np.array(forecasted[:-1])))
            error = np.nanmean(_error)
            errors[oc, pc] = error
            if (min_rmse - error) > threshold:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        elev = kwargs.get('elev', 30)
        azim = kwargs.get('azim', 144)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d')
    if _3d:
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_ylabel('Number of partitions')
        ax1.set_xlabel('RMSE')
        ax0.plot(errors, partitions)
    ret.append(best)
    ret.append(forecasted_best)

    # plt.tight_layout()

    file = kwargs.get('file', None)
    save = kwargs.get('save', False)

    Util.show_and_save_image(fig, file, save)

    return ret
예제 #29
0
def SelecaoSimples_MenorRMSE(original, parameters, modelo):
    ret = []
    errors = []
    forecasted_best = []
    print("Série Original")
    fig = plt.figure(figsize=[20, 12])
    fig.suptitle("Comparação de modelos ")
    ax0 = fig.add_axes([0, 0.5, 0.65, 0.45])  # left, bottom, width, height
    ax0.set_xlim([0, len(original)])
    ax0.set_ylim([min(original), max(original)])
    ax0.set_title('Série Temporal')
    ax0.set_ylabel('F(T)')
    ax0.set_xlabel('T')
    ax0.plot(original, label="Original")
    min_rmse = 100000.0
    best = None
    for p in parameters:
        sets = Grid.GridPartitioner(data=original, npart=p).sets
        fts = modelo(str(p) + " particoes")
        fts.train(original, sets=sets)
        # print(original)
        forecasted = fts.forecast(original)
        forecasted.insert(0, original[0])
        # print(forecasted)
        ax0.plot(forecasted, label=fts.name)
        error = Measures.rmse(np.array(forecasted), np.array(original))
        print(p, error)
        errors.append(error)
        if error < min_rmse:
            min_rmse = error
            best = fts
            forecasted_best = forecasted
    handles0, labels0 = ax0.get_legend_handles_labels()
    ax0.legend(handles0, labels0)
    ax1 = fig.add_axes([0.7, 0.5, 0.3, 0.45])  # left, bottom, width, height
    ax1.set_title('Comparação dos Erros Quadráticos Médios')
    ax1.set_ylabel('RMSE')
    ax1.set_xlabel('Quantidade de Partições')
    ax1.set_xlim([min(parameters), max(parameters)])
    ax1.plot(parameters, errors)
    ret.append(best)
    ret.append(forecasted_best)
    # Modelo diferencial
    print("\nSérie Diferencial")
    difffts = Transformations.differential(original)
    errors = []
    forecastedd_best = []
    ax2 = fig.add_axes([0, 0, 0.65, 0.45])  # left, bottom, width, height
    ax2.set_xlim([0, len(difffts)])
    ax2.set_ylim([min(difffts), max(difffts)])
    ax2.set_title('Série Temporal')
    ax2.set_ylabel('F(T)')
    ax2.set_xlabel('T')
    ax2.plot(difffts, label="Original")
    min_rmse = 100000.0
    bestd = None
    for p in parameters:
        sets = Grid.GridPartitioner(data=difffts, npart=p)
        fts = modelo(str(p) + " particoes")
        fts.train(difffts, sets=sets)
        forecasted = fts.forecast(difffts)
        forecasted.insert(0, difffts[0])
        ax2.plot(forecasted, label=fts.name)
        error = Measures.rmse(np.array(forecasted), np.array(difffts))
        print(p, error)
        errors.append(error)
        if error < min_rmse:
            min_rmse = error
            bestd = fts
            forecastedd_best = forecasted
    handles0, labels0 = ax2.get_legend_handles_labels()
    ax2.legend(handles0, labels0)
    ax3 = fig.add_axes([0.7, 0, 0.3, 0.45])  # left, bottom, width, height
    ax3.set_title('Comparação dos Erros Quadráticos Médios')
    ax3.set_ylabel('RMSE')
    ax3.set_xlabel('Quantidade de Partições')
    ax3.set_xlim([min(parameters), max(parameters)])
    ax3.plot(parameters, errors)
    ret.append(bestd)
    ret.append(forecastedd_best)
    return ret
예제 #30
0
        model = method(**parameters[ct])
        model.fit(train)
        start = model.order + 1
        end = start + horizon
        intervals = model.predict(test[:10],
                                  type='interval',
                                  alpha=.25,
                                  steps_ahead=horizon)
        distributions = model.predict(test[:10],
                                      type='distribution',
                                      smooth='histogram',
                                      steps_ahead=horizon,
                                      num_bins=100)
        print(
            model.name,
            Measures.get_interval_ahead_statistics(test[start:end], intervals))
        print(
            model.name,
            Measures.get_distribution_ahead_statistics(test[start:end],
                                                       distributions))

print('end')

#f, ax = plt.subplots(1, 1, figsize=[20, 5])

#ax.plot(data)
#bchmk.plot_interval(ax, forecasts, 3, "")
#print(forecasts)
'''
mu_local = 5
sigma_local = 0.25