Example #1
0
def execute(dataset, **kwargs):
    file = kwargs.get('file', 'hyperparam.db')

    conn = hUtil.open_hyperparam_db(file)

    experiments = kwargs.get('experiments', 30)

    distributed = kwargs.get('distributed', False)

    if distributed == 'dispy':
        import dispy
        from pyFTS.distributed import dispy as dUtil
        nodes = kwargs.get('nodes', ['127.0.0.1'])
        cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
        kwargs['cluster'] = cluster

    ret = []
    for i in np.arange(experiments):
        print("Experiment {}".format(i))

        start = time.time()
        ret, statistics = GeneticAlgorithm(dataset, **kwargs)
        end = time.time()
        ret['time'] = end - start
        experiment = {'individual': ret, 'statistics': statistics}

        ret = process_experiment(experiment, '', conn)

    if distributed == 'dispy':
        dUtil.stop_dispy_cluster(cluster, http_server)

    return ret
Example #2
0
def execute(datasetname, dataset, **kwargs):
    nodes = kwargs.get('nodes', ['127.0.0.1'])

    cluster, http_server = Util.start_dispy_cluster(cluster_method,
                                                    nodes=nodes)
    conn = hUtil.open_hyperparam_db('hyperparam.db')

    ngen = kwargs.get('ngen', 70)
    npop = kwargs.get('npop', 20)
    pcruz = kwargs.get('pcruz', .8)
    pmut = kwargs.get('pmut', .2)
    option = kwargs.get('option', 1)

    jobs = []

    for i in range(kwargs.get('experiments', 30)):
        print("Experiment {}".format(i))
        job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option)
        jobs.append(job)

    process_jobs(jobs, datasetname, conn)

    Util.stop_dispy_cluster(cluster, http_server)
Example #3
0
def execute(datasetname, dataset, **kwargs):
    """
    Batch execution of Distributed Evolutionary Hyperparameter Optimization (DEHO) for monovariate methods

    :param datasetname:
    :param dataset: The time series to optimize the FTS
    :keyword database_file:
    :keyword experiments:
    :keyword distributed:
    :keyword ngen: An integer value with the maximum number of generations, default value: 30
    :keyword mgen: An integer value with the maximum number of generations without improvement to stop, default value 7
    :keyword npop: An integer value with the population size, default value: 20
    :keyword pcross: A float value between 0 and 1 with the probability of crossover, default: .5
    :keyword psel: A float value between 0 and 1 with the probability of selection, default: .5
    :keyword pmut: A float value between 0 and 1 with the probability of mutation, default: .3
    :keyword fts_method: The FTS method to optimize
    :keyword parameters: dict with model specific arguments for fts_method
    :keyword elitism: A boolean value indicating if the best individual must always survive to next population
    :keyword initial_operator: a function that receives npop and return a random population with size npop
    :keyword random_individual: create an random genotype
    :keyword evalutation_operator: a function that receives a dataset and an individual and return its fitness
    :keyword selection_operator: a function that receives the whole population and return a selected individual
    :keyword crossover_operator: a function that receives the whole population and return a descendent individual
    :keyword mutation_operator: a function that receives one individual and return a changed individual
    :keyword window_size: An integer value with the the length of scrolling window for train/test on dataset
    :keyword train_rate: A float value between 0 and 1 with the train/test split ([0,1])
    :keyword increment_rate: A float value between 0 and 1 with the the increment of the scrolling window,
             relative to the window_size ([0,1])
    :keyword collect_statistics: A boolean value indicating to collect statistics for each generation
    :keyword distributed: A value indicating it the execution will be local and sequential (distributed=False),
             or parallel and distributed (distributed='dispy' or distributed='spark')
    :keyword cluster: If distributed='dispy' the list of cluster nodes, else if distributed='spark' it is the master node
    :return: the best genotype
    """

    file = kwargs.get('database_file', 'hyperparam.db')

    conn = hUtil.open_hyperparam_db(file)

    experiments = kwargs.get('experiments', 30)

    distributed = kwargs.get('distributed', False)

    fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
    shortname = str(fts_method.__module__).split('.')[-1]

    if distributed == 'dispy':
        nodes = kwargs.get('nodes', ['127.0.0.1'])
        cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
        kwargs['cluster'] = cluster

    ret = []
    for i in np.arange(experiments):
        print("Experiment {}".format(i))

        start = time.time()
        ret, statistics = GeneticAlgorithm(dataset, **kwargs)
        end = time.time()
        ret['time'] = end - start
        experiment = {'individual': ret, 'statistics': statistics}

        ret = process_experiment(shortname, experiment, datasetname, conn)

    if distributed == 'dispy':
        dUtil.stop_dispy_cluster(cluster, http_server)

    return ret
Example #4
0
def execute(hyperparams, datasetname, dataset, **kwargs):

    nodes = kwargs.get('nodes', ['127.0.0.1'])

    individuals = []

    if 'lags' in hyperparams:
        lags = hyperparams.pop('lags')
    else:
        lags = [k for k in np.arange(50)]

    keys_sorted = [k for k in sorted(hyperparams.keys())]

    index = {}
    for k in np.arange(len(keys_sorted)):
        index[keys_sorted[k]] = k

    print("Evaluation order: \n {}".format(index))

    hp_values = [[v for v in hyperparams[hp]] for hp in keys_sorted]

    print("Evaluation values: \n {}".format(hp_values))

    cluster, http_server = dUtil.start_dispy_cluster(cluster_method,
                                                     nodes=nodes)
    conn = hUtil.open_hyperparam_db('hyperparam.db')

    for instance in product(*hp_values):
        partitions = instance[index['partitions']]
        partitioner = instance[index['partitioner']]
        mf = instance[index['mf']]
        alpha_cut = instance[index['alpha']]
        order = instance[index['order']]
        count = 0
        for lag1 in lags:  # o é o lag1
            _lags = [lag1]
            count += 1
            if order > 1:
                for lag2 in lags:  # o é o lag1
                    _lags2 = [lag1, lag1 + lag2]
                    count += 1
                    if order > 2:
                        for lag3 in lags:  # o é o lag1
                            count += 1
                            _lags3 = [lag1, lag1 + lag2, lag1 + lag2 + lag3]
                            individuals.append(
                                dict_individual(mf, partitioner, partitions,
                                                order, _lags3, alpha_cut))
                    else:
                        individuals.append(
                            dict_individual(mf, partitioner, partitions, order,
                                            _lags2, alpha_cut))
            else:
                individuals.append(
                    dict_individual(mf, partitioner, partitions, order, _lags,
                                    alpha_cut))

            if count > 10:
                jobs = []

                for ind in individuals:
                    print("Testing individual {}".format(ind))
                    job = cluster.submit(ind, dataset, **kwargs)
                    jobs.append(job)

                process_jobs(jobs, datasetname, conn)

                count = 0

                individuals = []

    dUtil.stop_dispy_cluster(cluster, http_server)