예제 #1
0
def cluster_method(individual, dataset, **kwargs):
    from pyFTS.common import Util, Membership
    from pyFTS.models import hofts
    from pyFTS.partitioners import Grid, Entropy
    from pyFTS.benchmarks import Measures
    import numpy as np

    if individual['mf'] == 1:
        mf = Membership.trimf
    elif individual['mf'] == 2:
        mf = Membership.trapmf
    elif individual['mf'] == 3 and individual['partitioner'] != 2:
        mf = Membership.gaussmf
    else:
        mf = Membership.trimf

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    parameters = kwargs.get('parameters', {})

    errors = []
    sizes = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        if individual['partitioner'] == 1:
            partitioner = Grid.GridPartitioner(data=train,
                                               npart=individual['npart'],
                                               func=mf)
        elif individual['partitioner'] == 2:
            npart = individual['npart'] if individual['npart'] > 10 else 10
            partitioner = Entropy.EntropyPartitioner(data=train,
                                                     npart=npart,
                                                     func=mf)

        model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
                                           lags=individual['lags'],
                                           alpha_cut=individual['alpha'],
                                           order=individual['order'])
        model.fit(train)

        forecasts = model.predict(test)

        #rmse, mape, u = Measures.get_point_statistics(test, model)
        rmse = Measures.rmse(test[model.max_lag:], forecasts)

        size = len(model)

        errors.append(rmse)
        sizes.append(size)

    return {
        'parameters': individual,
        'rmse': np.nanmean(errors),
        'size': np.nanmean(size)
    }
예제 #2
0
def evaluation1(dataset, individual):
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures

    try:
        results = []
        lengths = []

        for count, train, test in Util.sliding_window(dataset,
                                                      800,
                                                      train=.8,
                                                      inc=.25):
            model = phenotype(individual, train)

            if model is None:
                return (None)

            rmse, _, _ = Measures.get_point_statistics(test, model)
            lengths.append(len(model))

            results.append(rmse)

            _lags = sum(model.lags) * 100

            rmse = np.nansum(
                [.6 * np.nanmean(results), .4 * np.nanstd(results)])
            len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags])

        return len_lags, rmse

    except Exception as ex:
        print("EXCEPTION!", str(ex), str(individual))
        return np.inf
예제 #3
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.hyperparam.Evolutionary import phenotype, __measures
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
    parameters = kwargs.get('parameters', {})

    if individual['f1'] is not None and individual['f2'] is not None:
        return {key: individual[key] for key in __measures}

    errors = []
    lengths = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        model = phenotype(individual,
                          train,
                          fts_method=fts_method,
                          parameters=parameters)

        forecasts = model.predict(test)

        rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1])
        lengths.append(len(model))

        errors.append(rmse)

    _lags = sum(model.lags) * 100

    _rmse = np.nanmean(errors)
    _len = np.nanmean(lengths)

    f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
    f2 = np.nansum([.4 * _len, .6 * _lags])

    return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
예제 #4
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.fcm.GA import phenotype
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    #parameters = kwargs.get('parameters',{})

    errors = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        model = phenotype(individual, train)

        if model is None:
            raise Exception("Phenotype returned None")

        model.uod_clip = False

        forecasts = model.predict(test)

        rmse = Measures.rmse(
            test[model.max_lag:],
            forecasts[:-1])  #.get_point_statistics(test, model)

        errors.append(rmse)

    _rmse = np.nanmean(errors)
    _std = np.nanstd(errors)

    #print("EVALUATION {}".format(individual))
    return {'rmse': .6 * _rmse + .4 * _std}
예제 #5
0
파일: Util.py 프로젝트: goldstar111/pyFTS
def sliding_window_simple_search(data, windowsize, model, partitions, orders,
                                 **kwargs):

    _3d = len(orders) > 1
    ret = []
    errors = np.array([[0 for k in range(len(partitions))]
                       for kk in range(len(orders))])
    forecasted_best = []

    figsize = kwargs.get('figsize', [10, 15])
    fig = plt.figure(figsize=figsize)

    plotforecasts = kwargs.get('plotforecasts', False)
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(data)])
        ax0.set_ylim([min(data) * 0.9, max(data) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    intervals = kwargs.get('intervals', False)
    threshold = kwargs.get('threshold', 0.5)

    progressbar = kwargs.get('progressbar', None)

    rng1 = enumerate(partitions, start=0)

    if progressbar:
        from tqdm import tqdm
        rng1 = enumerate(tqdm(partitions), start=0)

    for pc, p in rng1:
        fs = Grid.GridPartitioner(data=data, npart=p)

        rng2 = enumerate(orders, start=0)

        if progressbar:
            rng2 = enumerate(tqdm(orders), start=0)

        for oc, o in rng2:
            _error = []
            for ct, train, test in Util.sliding_window(data, windowsize, 0.8,
                                                       **kwargs):
                fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs)
                fts.fit(train, order=o)
                if not intervals:
                    forecasted = fts.forecast(test)
                    if not fts.has_seasonality:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1])))
                    else:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted)))
                    for kk in range(o):
                        forecasted.insert(0, None)
                    if plotforecasts: ax0.plot(forecasted, label=fts.name)
                else:
                    forecasted = fts.forecast_interval(test)
                    _error.append(1.0 - Measures.rmse_interval(
                        np.array(test[o:]), np.array(forecasted[:-1])))
            error = np.nanmean(_error)
            errors[oc, pc] = error
            if (min_rmse - error) > threshold:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        elev = kwargs.get('elev', 30)
        azim = kwargs.get('azim', 144)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d')
    if _3d:
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_ylabel('Number of partitions')
        ax1.set_xlabel('RMSE')
        ax0.plot(errors, partitions)
    ret.append(best)
    ret.append(forecasted_best)

    # plt.tight_layout()

    file = kwargs.get('file', None)
    save = kwargs.get('save', False)

    Util.show_and_save_image(fig, file, save)

    return ret
예제 #6
0
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
    """
    Sliding window benchmarks for FTS forecasters.

    For each data window, a train and test datasets will be splitted. For each train split, number of
    partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
    steps ahead and FTS method a foreasting model will be trained.

    Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database
    (identified by the 'file' parameter) for posterior analysis.

    All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and
    informing the list of dispy nodes on 'nodes' parameter.

    The number of experiments is determined by 'windowsize' and 'inc' parameters.

    :param data: test data
    :param windowsize: size of sliding window
    :param train: percentual of sliding window data used to train the models
    :param kwargs: dict, optional arguments

    :keyword
        benchmark_methods:  a list with Non FTS models to benchmark. The default is None.
        benchmark_methods_parameters:  a list with Non FTS models parameters. The default is None.
        benchmark_models: A boolean value indicating if external FTS methods will be used on benchmark. The default is False.
        build_methods: A boolean value indicating if the default FTS methods will be used on benchmark. The default is True.
        dataset: the dataset name to identify the current set of benchmarks results on database.
        distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
        file: file path to save the results. The default is benchmarks.db.
        inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
        methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
        models: a list with prebuilt FTS objects. The default is None.
        nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
        orders: a list with orders of the models (for high order models). The default is [1,2,3].
        partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
        partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
        partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
        progress: If true a progress bar will be displayed during the benchmarks. The default is False.
        start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
        steps_ahead: a list with  the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
        tag: a name to identify the current set of benchmarks results on database.
        type: the forecasting type, one of these values: point(default), interval or distribution. The default is point.
        transformations: a list with data transformations do apply . The default is [None].
    """

    tag = __pop('tag', None, kwargs)
    dataset = __pop('dataset', None, kwargs)

    distributed = __pop('distributed', False, kwargs)

    transformations = kwargs.get('transformations', [None])
    progress = kwargs.get('progress', None)
    type = kwargs.get("type", 'point')

    orders = __pop("orders", [1, 2, 3], kwargs)

    partitioners_models = __pop("partitioners_models", None, kwargs)
    partitioners_methods = __pop("partitioners_methods",
                                 [Grid.GridPartitioner], kwargs)
    partitions = __pop("partitions", [10], kwargs)

    steps_ahead = __pop('steps_ahead', [1], kwargs)

    methods = __pop('methods', None, kwargs)

    models = __pop('models', None, kwargs)

    pool = [] if models is None else models

    if methods is None:
        if type == 'point':
            methods = get_point_methods()
        elif type == 'interval':
            methods = get_interval_methods()
        elif type == 'distribution':
            methods = get_probabilistic_methods()

    build_methods = __pop("build_methods", True, kwargs)

    if build_methods:
        for method in methods:
            mfts = method()

            if mfts.is_high_order:
                for order in orders:
                    if order >= mfts.min_order:
                        mfts = method()
                        mfts.order = order
                        pool.append(mfts)
            else:
                mfts.order = 1
                pool.append(mfts)

    benchmark_models = __pop("benchmark_models", False, kwargs)

    if benchmark_models != False:

        benchmark_methods = __pop("benchmark_methods", None, kwargs)
        benchmark_methods_parameters = __pop("benchmark_methods_parameters",
                                             None, kwargs)

        benchmark_pool = [] if ( benchmark_models is None or not isinstance(benchmark_models, list)) \
            else benchmark_models

        if benchmark_models is None and benchmark_methods is None:
            if type == 'point' or type == 'partition':
                benchmark_methods = get_benchmark_point_methods()
            elif type == 'interval':
                benchmark_methods = get_benchmark_interval_methods()
            elif type == 'distribution':
                benchmark_methods = get_benchmark_probabilistic_methods()

        if benchmark_methods is not None:
            for transformation in transformations:
                for count, model in enumerate(benchmark_methods, start=0):
                    par = benchmark_methods_parameters[count]
                    mfts = model(**par)
                    mfts.append_transformation(transformation)
                    benchmark_pool.append(mfts)

    if type == 'point':
        experiment_method = run_point
        synthesis_method = process_point_jobs
    elif type == 'interval':
        experiment_method = run_interval
        synthesis_method = process_interval_jobs
    elif type == 'distribution':
        experiment_method = run_probabilistic
        synthesis_method = process_probabilistic_jobs
    else:
        raise ValueError("Type parameter has a unkown value!")

    if distributed:
        import dispy, dispy.httpd

        nodes = kwargs.get("nodes", ['127.0.0.1'])
        cluster, http_server = cUtil.start_dispy_cluster(
            experiment_method, nodes)

    jobs = []

    inc = __pop("inc", 0.1, kwargs)

    if progress:
        from tqdm import tqdm
        _tdata = len(data) / (windowsize * inc)
        _tasks = (len(partitioners_models) * len(orders) * len(partitions) *
                  len(transformations) * len(steps_ahead))
        _tbcmk = len(benchmark_pool) * len(steps_ahead)
        progressbar = tqdm(total=_tdata * _tasks + _tdata * _tbcmk,
                           desc="Benchmarks:")

    file = kwargs.get('file', "benchmarks.db")

    conn = bUtil.open_benchmark_db(file)

    for ct, train, test in cUtil.sliding_window(data,
                                                windowsize,
                                                train,
                                                inc=inc,
                                                **kwargs):
        if benchmark_models != False:
            for model in benchmark_pool:
                for step in steps_ahead:

                    kwargs['steps_ahead'] = step

                    if not distributed:
                        if progress:
                            progressbar.update(1)
                        try:
                            job = experiment_method(deepcopy(model), None,
                                                    train, test, **kwargs)
                            synthesis_method(dataset, tag, job, conn)
                        except Exception as ex:
                            print('EXCEPTION! ', model.shortname, model.order)
                            traceback.print_exc()
                    else:
                        job = cluster.submit(deepcopy(model), None, train,
                                             test, **kwargs)
                        jobs.append(job)

        partitioners_pool = []

        if partitioners_models is None:

            for transformation in transformations:

                for partition in partitions:

                    for partitioner in partitioners_methods:

                        data_train_fs = partitioner(
                            data=train,
                            npart=partition,
                            transformation=transformation)

                        partitioners_pool.append(data_train_fs)
        else:
            partitioners_pool = partitioners_models

        for step in steps_ahead:

            for partitioner in partitioners_pool:

                for _id, model in enumerate(pool, start=0):

                    kwargs['steps_ahead'] = step

                    if not distributed:
                        if progress:
                            progressbar.update(1)
                        try:
                            job = experiment_method(deepcopy(model),
                                                    deepcopy(partitioner),
                                                    train, test, **kwargs)
                            synthesis_method(dataset, tag, job, conn)
                        except Exception as ex:
                            print('EXCEPTION! ', model.shortname, model.order,
                                  partitioner.name, partitioner.partitions,
                                  str(partitioner.transformation))
                            traceback.print_exc()
                    else:
                        job = cluster.submit(deepcopy(model),
                                             deepcopy(partitioner), train,
                                             test, **kwargs)
                        job.id = id  # associate an ID to identify jobs (if needed later)
                        jobs.append(job)

    if progress:
        progressbar.close()

    if distributed:

        for job in jobs:
            if progress:
                progressbar.update(1)
            job()
            if job.status == dispy.DispyJob.Finished and job is not None:
                tmp = job.result
                synthesis_method(dataset, tag, tmp, conn)
            else:
                print("status", job.status)
                print("result", job.result)
                print("stdout", job.stdout)
                print("stderr", job.exception)

        cluster.wait()  # wait for all jobs to finish

        cUtil.stop_dispy_cluster(cluster, http_server)

    conn.close()
예제 #7
0
파일: mvfts.py 프로젝트: zilla-liu/pyFTS
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    import logging
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.hyperparam.Evolutionary import __measures
    from pyFTS.hyperparam.mvfts import phenotype
    from pyFTS.models.multivariate import mvfts, wmvfts, partitioner, variable, cmvfts, grid, granular, common
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS)
    parameters = kwargs.get('parameters', {})
    tvar = kwargs.get('target_variable', None)

    if individual['f1'] is not None and individual['f2'] is not None:
        return {key: individual[key] for key in __measures}

    errors = []
    lengths = []

    kwargs2 = kwargs.copy()
    kwargs2.pop('fts_method')
    if 'parameters' in kwargs2:
        kwargs2.pop('parameters')

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        try:

            model = phenotype(individual,
                              train,
                              fts_method=fts_method,
                              parameters=parameters,
                              **kwargs2)

            forecasts = model.predict(test)

            rmse = Measures.rmse(
                test[tvar['data_label']].values[model.max_lag:],
                forecasts[:-1])
            lengths.append(len(model))

            errors.append(rmse)

        except Exception as ex:
            logging.exception("Error")

            lengths.append(np.nan)
            errors.append(np.nan)

    try:
        _rmse = np.nanmean(errors)
        _len = np.nanmean(lengths)

        f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
        f2 = np.nansum([.9 * _len, .1 * np.nanstd(lengths)])

        return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
    except Exception as ex:
        logging.exception("Error")
        return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}
예제 #8
0
def ahead_sliding_window(data,
                         windowsize,
                         steps,
                         resolution,
                         train=0.8,
                         inc=0.1,
                         models=None,
                         partitioners=[Grid.GridPartitioner],
                         partitions=[10],
                         max_order=3,
                         transformation=None,
                         indexer=None,
                         dump=False,
                         benchmark_models=None,
                         benchmark_models_parameters=None,
                         save=False,
                         file=None,
                         synthetic=False,
                         nodes=None):
    """
    Distributed sliding window benchmarks for FTS probabilistic forecasters
    :param data: 
    :param windowsize: size of sliding window
    :param train: percentual of sliding window data used to train the models
    :param steps: 
    :param resolution: 
    :param models: FTS point forecasters
    :param partitioners: Universe of Discourse partitioner
    :param partitions: the max number of partitions on the Universe of Discourse 
    :param max_order: the max order of the models (for high order models)
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :param dump: 
    :param save: save results
    :param file: file path to save the results
    :param synthetic: if true only the average and standard deviation of the results
    :param nodes: list of cluster nodes to distribute tasks
    :param depends: list of module dependencies 
    :return: DataFrame with the results 
    """

    alphas = [0.05, 0.25]

    if benchmark_models is None and models is None:
        benchmark_models = [
            arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA
        ]

    if benchmark_models_parameters is None:
        benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 0),
                                       (2, 0, 1), (2, 0, 2)]

    cluster = dispy.JobCluster(benchmarks.run_ahead,
                               nodes=nodes)  # , depends=dependencies)

    http_server = dispy.httpd.DispyHTTPServer(cluster)

    _process_start = time.time()

    print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))

    pool = []
    jobs = []
    objs = {}
    crps_interval = {}
    crps_distr = {}
    times1 = {}
    times2 = {}

    if models is None:
        models = benchmarks.get_probabilistic_methods()

    for model in models:
        mfts = model("")

        if mfts.is_high_order:
            for order in np.arange(1, max_order + 1):
                if order >= mfts.min_order:
                    mfts = model("")
                    mfts.order = order
                    pool.append(mfts)
        else:
            pool.append(mfts)

    if benchmark_models is not None:
        for count, model in enumerate(benchmark_models, start=0):
            for a in alphas:
                par = benchmark_models_parameters[count]
                mfts = model(str(par if par is not None else ""),
                             alpha=a,
                             dist=True)
                mfts.order = par
                pool.append(mfts)

    experiments = 0
    for ct, train, test in Util.sliding_window(data,
                                               windowsize,
                                               train,
                                               inc=inc):
        experiments += 1

        benchmarks_only = {}

        if dump: print('\nWindow: {0}\n'.format(ct))

        for partition in partitions:

            for partitioner in partitioners:

                data_train_fs = partitioner(train,
                                            partition,
                                            transformation=transformation)

                for id, m in enumerate(pool, start=0):
                    if m.benchmark_only and m.shortname in benchmarks_only:
                        continue
                    else:
                        benchmarks_only[m.shortname] = m
                    job = cluster.submit(m, data_train_fs, train, test, steps,
                                         resolution, ct, transformation,
                                         indexer)
                    job.id = id  # associate an ID to identify jobs (if needed later)
                    jobs.append(job)

    for job in jobs:
        tmp = job()
        if job.status == dispy.DispyJob.Finished and tmp is not None:
            if tmp['key'] not in objs:
                objs[tmp['key']] = tmp['obj']
                crps_interval[tmp['key']] = []
                crps_distr[tmp['key']] = []
                times1[tmp['key']] = []
                times2[tmp['key']] = []
            crps_interval[tmp['key']].append_rhs(tmp['CRPS_Interval'])
            crps_distr[tmp['key']].append_rhs(tmp['CRPS_Distribution'])
            times1[tmp['key']].append_rhs(tmp['TIME_Interval'])
            times2[tmp['key']].append_rhs(tmp['TIME_Distribution'])

        else:
            print(job.exception)
            print(job.stdout)

    _process_end = time.time()

    print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now()))

    print("Process Duration: {0}".format(_process_end - _process_start))

    cluster.wait()  # wait for all jobs to finish

    cluster.print_status()

    http_server.shutdown()  # this waits until browser gets all updates
    cluster.close()

    return bUtil.save_dataframe_ahead(experiments, file, objs, crps_interval,
                                      crps_distr, times1, times2, save,
                                      synthetic)
예제 #9
0
def point_sliding_window(data,
                         windowsize,
                         train=0.8,
                         inc=0.1,
                         models=None,
                         partitioners=[Grid.GridPartitioner],
                         partitions=[10],
                         max_order=3,
                         transformation=None,
                         indexer=None,
                         dump=False,
                         benchmark_models=None,
                         benchmark_models_parameters=None,
                         save=False,
                         file=None,
                         sintetic=False,
                         nodes=None,
                         depends=None):
    """
    Distributed sliding window benchmarks for FTS point forecasters
    :param data: 
    :param windowsize: size of sliding window
    :param train: percentual of sliding window data used to train the models
    :param inc: percentual of window is used do increment 
    :param models: FTS point forecasters
    :param partitioners: Universe of Discourse partitioner
    :param partitions: the max number of partitions on the Universe of Discourse 
    :param max_order: the max order of the models (for high order models)
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :param dump: 
    :param benchmark_models: Non FTS models to benchmark
    :param benchmark_models_parameters: Non FTS models parameters
    :param save: save results
    :param file: file path to save the results
    :param sintetic: if true only the average and standard deviation of the results
    :param nodes: list of cluster nodes to distribute tasks
    :param depends: list of module dependencies 
    :return: DataFrame with the results
    """

    cluster = dispy.JobCluster(benchmarks.run_point,
                               nodes=nodes)  #, depends=dependencies)

    http_server = dispy.httpd.DispyHTTPServer(cluster)

    _process_start = time.time()

    print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))

    jobs = []
    objs = {}
    rmse = {}
    smape = {}
    u = {}
    times = {}

    pool = build_model_pool_point(models, max_order, benchmark_models,
                                  benchmark_models_parameters)

    experiments = 0
    for ct, train, test in Util.sliding_window(data, windowsize, train, inc):
        experiments += 1

        benchmarks_only = {}

        if dump: print('\nWindow: {0}\n'.format(ct))

        for partition in partitions:

            for partitioner in partitioners:

                data_train_fs = partitioner(train,
                                            partition,
                                            transformation=transformation)

                for _id, m in enumerate(pool, start=0):
                    if m.benchmark_only and m.shortname in benchmarks_only:
                        continue
                    else:
                        benchmarks_only[m.shortname] = m
                    job = cluster.submit(m, data_train_fs, train, test, ct,
                                         transformation)
                    job.id = _id  # associate an ID to identify jobs (if needed later)
                    jobs.append(job)

    for job in jobs:
        tmp = job()
        if job.status == dispy.DispyJob.Finished and tmp is not None:
            if tmp['key'] not in objs:
                objs[tmp['key']] = tmp['obj']
                rmse[tmp['key']] = []
                smape[tmp['key']] = []
                u[tmp['key']] = []
                times[tmp['key']] = []
            rmse[tmp['key']].append_rhs(tmp['rmse'])
            smape[tmp['key']].append_rhs(tmp['smape'])
            u[tmp['key']].append_rhs(tmp['u'])
            times[tmp['key']].append_rhs(tmp['time'])
            print(tmp['key'], tmp['window'])
        else:
            print(job.exception)
            print(job.stdout)

    _process_end = time.time()

    print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now()))

    print("Process Duration: {0}".format(_process_end - _process_start))

    cluster.wait()  # wait for all jobs to finish

    cluster.print_status()

    http_server.shutdown()  # this waits until browser gets all updates
    cluster.close()

    return bUtil.save_dataframe_point(experiments, file, objs, rmse, save,
                                      sintetic, smape, times, u)
예제 #10
0
    BSTS.ARIMA
]
parameters = [{}, {
    'order': (2, 0, 0)
}, {
    'order': 1,
    'dist': True
}, {
    'order': (2, 0, 0)
}]

from pyFTS.benchmarks import Measures

horizon = 5

for ct, train, test, in Util.sliding_window(data, 1000, 0.8, .5):
    print('data window {}'.format(ct))
    for ct, method in enumerate(methods):
        model = method(**parameters[ct])
        model.fit(train)
        start = model.order + 1
        end = start + horizon
        intervals = model.predict(test[:10],
                                  type='interval',
                                  alpha=.25,
                                  steps_ahead=horizon)
        distributions = model.predict(test[:10],
                                      type='distribution',
                                      smooth='histogram',
                                      steps_ahead=horizon,
                                      num_bins=100)
예제 #11
0
jobs = []

models = []

for order in [1, 2, 3]:
    if order == 1:
        model = nsfts.NonStationaryFTS("")
    else:
        model = honsfts.HighOrderNonStationaryFTS("")

    model.order = order

    models.append(model)

for ct, train, test in cUtil.sliding_window(data, 300):
    for partition in np.arange(5, 100, 1):
        tmp_partitioner = Grid.GridPartitioner(train, partition)
        partitioner = partitioners.PolynomialNonStationaryPartitioner(train, tmp_partitioner,
                                                                      window_size=35, degree=1)
        for model in models:
            # print(model.shortname, partition, model.order)
            #job = evaluate_individual_model(model, train, test)
            job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, 35, 240)
            job.id = ct + model.order*100
            jobs.append(job)

results = {}

for job in jobs:
    tmp = job()
예제 #12
0
from pyFTS.common import Transformations, Util as cUtil

diff = Transformations.Differential(lag=1)

train = dataset[:1000]
test = dataset[1000:]

#grid = Grid.GridPartitioner(data=train, transformation=diff)
#model = pwfts.ProbabilisticWeightedFTS(partitioner=grid)
#model.append_transformation(diff)

model = naive.Naive()

model.fit(train)

for ct, ttrain, ttest in cUtil.sliding_window(test, 1000, .95, inc=.5):
    if model.shortname not in ('PWFTS', 'Naive'):
        model.predict(ttrain)
    print(ttest)
    if len(ttest) > 0:
        forecasts = model.predict(ttest, steps_ahead=10)
        measures = Measures.get_point_ahead_statistics(ttest[1:11], forecasts)

    print(measures)
'''
from pyFTS.models.nonstationary import partitioners as nspart, nsfts, honsfts
fs = nspart.simplenonstationary_gridpartitioner_builder(data=train,npart=35,transformation=None)
print(fs)
#model = honsfts.HighOrderNonStationaryFTS(partitioner=fs, order=2)
model = nsfts.WeightedNonStationaryFTS(partitioner=fs)
model.fit(train)
예제 #13
0
def point_sliding_window(data,
                         windowsize,
                         train=0.8,
                         models=None,
                         partitioners=[Grid.GridPartitioner],
                         partitions=[10],
                         max_order=3,
                         transformation=None,
                         indexer=None,
                         dump=False,
                         save=False,
                         file=None,
                         sintetic=False):
    """
    Parallel sliding window benchmarks for FTS point forecasters
    :param data: 
    :param windowsize: size of sliding window
    :param train: percentual of sliding window data used to train the models
    :param models: FTS point forecasters
    :param partitioners: Universe of Discourse partitioner
    :param partitions: the max number of partitions on the Universe of Discourse 
    :param max_order: the max order of the models (for high order models)
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :param dump: 
    :param save: save results
    :param file: file path to save the results
    :param sintetic: if true only the average and standard deviation of the results
    :return: DataFrame with the results
    """
    _process_start = time.time()

    print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))

    num_cores = multiprocessing.cpu_count()

    pool = []

    objs = {}
    rmse = {}
    smape = {}
    u = {}
    times = {}

    for model in benchmarks.get_point_methods():
        mfts = model("")

        if mfts.is_high_order:
            for order in np.arange(1, max_order + 1):
                if order >= mfts.min_order:
                    mfts = model("")
                    mfts.order = order
                    pool.append(mfts)
        else:
            pool.append(mfts)

    experiments = 0
    for ct, train, test in Util.sliding_window(data, windowsize, train):
        experiments += 1

        if dump: print('\nWindow: {0}\n'.format(ct))

        for partition in partitions:

            for partitioner in partitioners:

                data_train_fs = partitioner(train,
                                            partition,
                                            transformation=transformation)

                results = Parallel(n_jobs=num_cores)(delayed(run_point)(
                    deepcopy(m), deepcopy(data_train_fs), deepcopy(train),
                    deepcopy(test), transformation) for m in pool)

                for tmp in results:
                    if tmp['key'] not in objs:
                        objs[tmp['key']] = tmp['obj']
                        rmse[tmp['key']] = []
                        smape[tmp['key']] = []
                        u[tmp['key']] = []
                        times[tmp['key']] = []
                    rmse[tmp['key']].append_rhs(tmp['rmse'])
                    smape[tmp['key']].append_rhs(tmp['smape'])
                    u[tmp['key']].append_rhs(tmp['u'])
                    times[tmp['key']].append_rhs(tmp['time'])

    _process_end = time.time()

    print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now()))

    print("Process Duration: {0}".format(_process_end - _process_start))

    return Util.save_dataframe_point(experiments, file, objs, rmse, save,
                                     sintetic, smape, times, u)
예제 #14
0
vload = variable.Variable("Load",
                          data_label="load",
                          alias='load',
                          partitioner=Grid.GridPartitioner,
                          npart=20,
                          func=Membership.gaussmf,
                          data=train_mv,
                          alpha_cut=.3)

rows = []

time_generator = lambda x: pd.to_datetime(x) + pd.to_timedelta(1, unit='h')

for ct, train, test in cUtil.sliding_window(df,
                                            windowsize=32000,
                                            train=.98,
                                            inc=.05):
    print('Window {}'.format(ct))
    for order in [1, 2, 3]:
        for knn in [1, 2, 3]:
            model = granular.GranularWMVFTS(
                explanatory_variables=[vhour, vtemp, vload],
                target_variable=vload,
                order=order,
                knn=knn)

            model.fit(train)

            forecasts1 = model.predict(test, type='multivariate')
            forecasts2 = model.predict(test,
                                       type='multivariate',