Example #1
0
def execute(datasetname, dataset, **kwargs):
    nodes = kwargs.get('nodes', ['127.0.0.1'])

    cluster, http_server = Util.start_dispy_cluster(cluster_method,
                                                    nodes=nodes)
    conn = hUtil.open_hyperparam_db('hyperparam.db')

    ngen = kwargs.get('ngen', 70)
    npop = kwargs.get('npop', 20)
    pcruz = kwargs.get('pcruz', .8)
    pmut = kwargs.get('pmut', .2)
    option = kwargs.get('option', 1)

    jobs = []

    for i in range(kwargs.get('experiments', 30)):
        print("Experiment {}".format(i))
        job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option)
        jobs.append(job)

    process_jobs(jobs, datasetname, conn)

    Util.stop_dispy_cluster(cluster, http_server)
Example #2
0
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
    """
    Sliding window benchmarks for FTS forecasters.

    For each data window, a train and test datasets will be splitted. For each train split, number of
    partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
    steps ahead and FTS method a foreasting model will be trained.

    Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database
    (identified by the 'file' parameter) for posterior analysis.

    All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and
    informing the list of dispy nodes on 'nodes' parameter.

    The number of experiments is determined by 'windowsize' and 'inc' parameters.

    :param data: test data
    :param windowsize: size of sliding window
    :param train: percentual of sliding window data used to train the models
    :param kwargs: dict, optional arguments

    :keyword
        benchmark_methods:  a list with Non FTS models to benchmark. The default is None.
        benchmark_methods_parameters:  a list with Non FTS models parameters. The default is None.
        benchmark_models: A boolean value indicating if external FTS methods will be used on benchmark. The default is False.
        build_methods: A boolean value indicating if the default FTS methods will be used on benchmark. The default is True.
        dataset: the dataset name to identify the current set of benchmarks results on database.
        distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
        file: file path to save the results. The default is benchmarks.db.
        inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
        methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
        models: a list with prebuilt FTS objects. The default is None.
        nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
        orders: a list with orders of the models (for high order models). The default is [1,2,3].
        partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
        partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
        partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
        progress: If true a progress bar will be displayed during the benchmarks. The default is False.
        start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
        steps_ahead: a list with  the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
        tag: a name to identify the current set of benchmarks results on database.
        type: the forecasting type, one of these values: point(default), interval or distribution. The default is point.
        transformations: a list with data transformations do apply . The default is [None].
    """

    tag = __pop('tag', None, kwargs)
    dataset = __pop('dataset', None, kwargs)

    distributed = __pop('distributed', False, kwargs)

    transformations = kwargs.get('transformations', [None])
    progress = kwargs.get('progress', None)
    type = kwargs.get("type", 'point')

    orders = __pop("orders", [1, 2, 3], kwargs)

    partitioners_models = __pop("partitioners_models", None, kwargs)
    partitioners_methods = __pop("partitioners_methods",
                                 [Grid.GridPartitioner], kwargs)
    partitions = __pop("partitions", [10], kwargs)

    steps_ahead = __pop('steps_ahead', [1], kwargs)

    methods = __pop('methods', None, kwargs)

    models = __pop('models', None, kwargs)

    pool = [] if models is None else models

    if methods is None:
        if type == 'point':
            methods = get_point_methods()
        elif type == 'interval':
            methods = get_interval_methods()
        elif type == 'distribution':
            methods = get_probabilistic_methods()

    build_methods = __pop("build_methods", True, kwargs)

    if build_methods:
        for method in methods:
            mfts = method()

            if mfts.is_high_order:
                for order in orders:
                    if order >= mfts.min_order:
                        mfts = method()
                        mfts.order = order
                        pool.append(mfts)
            else:
                mfts.order = 1
                pool.append(mfts)

    benchmark_models = __pop("benchmark_models", False, kwargs)

    if benchmark_models != False:

        benchmark_methods = __pop("benchmark_methods", None, kwargs)
        benchmark_methods_parameters = __pop("benchmark_methods_parameters",
                                             None, kwargs)

        benchmark_pool = [] if ( benchmark_models is None or not isinstance(benchmark_models, list)) \
            else benchmark_models

        if benchmark_models is None and benchmark_methods is None:
            if type == 'point' or type == 'partition':
                benchmark_methods = get_benchmark_point_methods()
            elif type == 'interval':
                benchmark_methods = get_benchmark_interval_methods()
            elif type == 'distribution':
                benchmark_methods = get_benchmark_probabilistic_methods()

        if benchmark_methods is not None:
            for transformation in transformations:
                for count, model in enumerate(benchmark_methods, start=0):
                    par = benchmark_methods_parameters[count]
                    mfts = model(**par)
                    mfts.append_transformation(transformation)
                    benchmark_pool.append(mfts)

    if type == 'point':
        experiment_method = run_point
        synthesis_method = process_point_jobs
    elif type == 'interval':
        experiment_method = run_interval
        synthesis_method = process_interval_jobs
    elif type == 'distribution':
        experiment_method = run_probabilistic
        synthesis_method = process_probabilistic_jobs
    else:
        raise ValueError("Type parameter has a unkown value!")

    if distributed:
        import dispy, dispy.httpd

        nodes = kwargs.get("nodes", ['127.0.0.1'])
        cluster, http_server = cUtil.start_dispy_cluster(
            experiment_method, nodes)

    jobs = []

    inc = __pop("inc", 0.1, kwargs)

    if progress:
        from tqdm import tqdm
        _tdata = len(data) / (windowsize * inc)
        _tasks = (len(partitioners_models) * len(orders) * len(partitions) *
                  len(transformations) * len(steps_ahead))
        _tbcmk = len(benchmark_pool) * len(steps_ahead)
        progressbar = tqdm(total=_tdata * _tasks + _tdata * _tbcmk,
                           desc="Benchmarks:")

    file = kwargs.get('file', "benchmarks.db")

    conn = bUtil.open_benchmark_db(file)

    for ct, train, test in cUtil.sliding_window(data,
                                                windowsize,
                                                train,
                                                inc=inc,
                                                **kwargs):
        if benchmark_models != False:
            for model in benchmark_pool:
                for step in steps_ahead:

                    kwargs['steps_ahead'] = step

                    if not distributed:
                        if progress:
                            progressbar.update(1)
                        try:
                            job = experiment_method(deepcopy(model), None,
                                                    train, test, **kwargs)
                            synthesis_method(dataset, tag, job, conn)
                        except Exception as ex:
                            print('EXCEPTION! ', model.shortname, model.order)
                            traceback.print_exc()
                    else:
                        job = cluster.submit(deepcopy(model), None, train,
                                             test, **kwargs)
                        jobs.append(job)

        partitioners_pool = []

        if partitioners_models is None:

            for transformation in transformations:

                for partition in partitions:

                    for partitioner in partitioners_methods:

                        data_train_fs = partitioner(
                            data=train,
                            npart=partition,
                            transformation=transformation)

                        partitioners_pool.append(data_train_fs)
        else:
            partitioners_pool = partitioners_models

        for step in steps_ahead:

            for partitioner in partitioners_pool:

                for _id, model in enumerate(pool, start=0):

                    kwargs['steps_ahead'] = step

                    if not distributed:
                        if progress:
                            progressbar.update(1)
                        try:
                            job = experiment_method(deepcopy(model),
                                                    deepcopy(partitioner),
                                                    train, test, **kwargs)
                            synthesis_method(dataset, tag, job, conn)
                        except Exception as ex:
                            print('EXCEPTION! ', model.shortname, model.order,
                                  partitioner.name, partitioner.partitions,
                                  str(partitioner.transformation))
                            traceback.print_exc()
                    else:
                        job = cluster.submit(deepcopy(model),
                                             deepcopy(partitioner), train,
                                             test, **kwargs)
                        job.id = id  # associate an ID to identify jobs (if needed later)
                        jobs.append(job)

    if progress:
        progressbar.close()

    if distributed:

        for job in jobs:
            if progress:
                progressbar.update(1)
            job()
            if job.status == dispy.DispyJob.Finished and job is not None:
                tmp = job.result
                synthesis_method(dataset, tag, tmp, conn)
            else:
                print("status", job.status)
                print("result", job.result)
                print("stdout", job.stdout)
                print("stderr", job.exception)

        cluster.wait()  # wait for all jobs to finish

        cUtil.stop_dispy_cluster(cluster, http_server)

    conn.close()
Example #3
0
def execute(hyperparams, datasetname, train, test, **kwargs):

    nodes = kwargs.get('nodes',['127.0.0.1'])

    individuals = []

    if 'lags' in hyperparams:
        lags = hyperparams.pop('lags')
    else:
        lags = [k for k in np.arange(50)]

    keys_sorted = [k for k in sorted(hyperparams.keys())]

    index = {}
    for k in np.arange(len(keys_sorted)):
        index[keys_sorted[k]] = k
        
    print("Evaluation order: \n {}".format(index))

    hp_values = [
        [v for v in hyperparams[hp]]
        for hp in keys_sorted
    ]
    
    print("Evaluation values: \n {}".format(hp_values))
    
    cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes)
    conn = hUtil.open_hyperparam_db('hyperparam.db')

    for instance in product(*hp_values):
        partitions = instance[index['partitions']]
        partitioner = instance[index['partitioner']]
        mf = instance[index['mf']]
        alpha_cut = instance[index['alpha']]
        order = instance[index['order']]
        count = 0
        for lag1 in lags: # o é o lag1
            _lags = [lag1]
            count += 1
            if order > 1:
                for lag2 in lags:  # o é o lag1
                    _lags2 = [lag1, lag1+lag2]
                    count += 1
                    if order > 2:
                        for lag3 in lags:  # o é o lag1
                            count += 1
                            _lags3 = [lag1, lag1 + lag2, lag1 + lag2+lag3 ]
                            individuals.append(dict_individual(mf, partitioner, partitions, order, _lags3, alpha_cut))
                    else:
                        individuals.append(
                            dict_individual(mf, partitioner, partitions, order, _lags2, alpha_cut))
            else:
                individuals.append(dict_individual(mf, partitioner, partitions, order, _lags, alpha_cut))
                
            if count > 50:
                jobs = []

                for ind in individuals:
                    print("Testing individual {}".format(ind))
                    job = cluster.submit(ind, train, test)
                    jobs.append(job)
                    
                process_jobs(jobs, datasetname, conn)
                
                count = 0
                
                individuals = []

    Util.stop_dispy_cluster(cluster, http_server)