예제 #1
0
def log_result(conn, datasetname, fts_method, result):
    metrics = ['rmse', 'size', 'time']
    for metric in metrics:
        record = (datasetname, 'Evolutive', fts_method, None, result['mf'],
                  result['order'], result['partitioner'], result['npart'],
                  result['alpha'], str(result['lags']), metric, result[metric])

        print(record)

        hUtil.insert_hyperparam(record, conn)
예제 #2
0
def execute(dataset, **kwargs):
    file = kwargs.get('file', 'hyperparam.db')

    conn = hUtil.open_hyperparam_db(file)

    experiments = kwargs.get('experiments', 30)

    distributed = kwargs.get('distributed', False)

    if distributed == 'dispy':
        import dispy
        from pyFTS.distributed import dispy as dUtil
        nodes = kwargs.get('nodes', ['127.0.0.1'])
        cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
        kwargs['cluster'] = cluster

    ret = []
    for i in np.arange(experiments):
        print("Experiment {}".format(i))

        start = time.time()
        ret, statistics = GeneticAlgorithm(dataset, **kwargs)
        end = time.time()
        ret['time'] = end - start
        experiment = {'individual': ret, 'statistics': statistics}

        ret = process_experiment(experiment, '', conn)

    if distributed == 'dispy':
        dUtil.stop_dispy_cluster(cluster, http_server)

    return ret
예제 #3
0
def process_jobs(jobs, datasetname, conn):
    for job in jobs:
        result, rmse, size, mape, u = job()
        if job.status == dispy.DispyJob.Finished and result is not None:
            print("Processing result of {}".format(result))
            
            metrics = {'rmse': rmse, 'size': size, 'mape': mape, 'u': u }
            
            for metric in metrics.keys():

                record = (datasetname, 'GridSearch', 'WHOFTS', None, result['mf'],
                          result['order'], result['partitioner'], result['npart'],
                          result['alpha'], str(result['lags']), metric, metrics[metric])
                          
                print(record)

                hUtil.insert_hyperparam(record, conn)

        else:
            print(job.exception)
            print(job.stdout)
예제 #4
0
def process_jobs(jobs, datasetname, conn):
    for job in jobs:
        result = job()
        if job.status == dispy.DispyJob.Finished and result is not None:
            print("Processing result of {}".format(result))

            metrics = ['rmse', 'size', 'time']

            for metric in metrics:
                record = (datasetname, 'Evolutive', 'WHOFTS', None,
                          result['mf'], result['order'], result['partitioner'],
                          result['npart'], result['alpha'],
                          str(result['lags']), metric, result[metric])

                print(record)

                hUtil.insert_hyperparam(record, conn)

        else:
            print(job.exception)
            print(job.stdout)
예제 #5
0
def process_jobs(jobs, datasetname, conn):
    for ct, job in enumerate(jobs):
        print("Processing job {}".format(ct))
        result = job()
        if job.status == dispy.DispyJob.Finished and result is not None:
            print("Processing result of {}".format(result))

            metrics = {'rmse': result['rmse'], 'size': result['size']}

            for metric in metrics.keys():

                param = result['parameters']

                record = (datasetname, 'GridSearch', 'WHOFTS', None,
                          param['mf'], param['order'],
                          param['partitioner'], param['npart'], param['alpha'],
                          str(param['lags']), metric, metrics[metric])

                hUtil.insert_hyperparam(record, conn)

        else:
            print(job.exception)
            print(job.stdout)
예제 #6
0
def execute(datasetname, dataset, **kwargs):
    nodes = kwargs.get('nodes', ['127.0.0.1'])

    cluster, http_server = Util.start_dispy_cluster(cluster_method,
                                                    nodes=nodes)
    conn = hUtil.open_hyperparam_db('hyperparam.db')

    ngen = kwargs.get('ngen', 70)
    npop = kwargs.get('npop', 20)
    pcruz = kwargs.get('pcruz', .8)
    pmut = kwargs.get('pmut', .2)
    option = kwargs.get('option', 1)

    jobs = []

    for i in range(kwargs.get('experiments', 30)):
        print("Experiment {}".format(i))
        job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option)
        jobs.append(job)

    process_jobs(jobs, datasetname, conn)

    Util.stop_dispy_cluster(cluster, http_server)
예제 #7
0
def execute(datasetname, dataset, **kwargs):
    """
    Batch execution of Distributed Evolutionary Hyperparameter Optimization (DEHO) for monovariate methods

    :param datasetname:
    :param dataset: The time series to optimize the FTS
    :keyword database_file:
    :keyword experiments:
    :keyword distributed:
    :keyword ngen: An integer value with the maximum number of generations, default value: 30
    :keyword mgen: An integer value with the maximum number of generations without improvement to stop, default value 7
    :keyword npop: An integer value with the population size, default value: 20
    :keyword pcross: A float value between 0 and 1 with the probability of crossover, default: .5
    :keyword psel: A float value between 0 and 1 with the probability of selection, default: .5
    :keyword pmut: A float value between 0 and 1 with the probability of mutation, default: .3
    :keyword fts_method: The FTS method to optimize
    :keyword parameters: dict with model specific arguments for fts_method
    :keyword elitism: A boolean value indicating if the best individual must always survive to next population
    :keyword initial_operator: a function that receives npop and return a random population with size npop
    :keyword random_individual: create an random genotype
    :keyword evalutation_operator: a function that receives a dataset and an individual and return its fitness
    :keyword selection_operator: a function that receives the whole population and return a selected individual
    :keyword crossover_operator: a function that receives the whole population and return a descendent individual
    :keyword mutation_operator: a function that receives one individual and return a changed individual
    :keyword window_size: An integer value with the the length of scrolling window for train/test on dataset
    :keyword train_rate: A float value between 0 and 1 with the train/test split ([0,1])
    :keyword increment_rate: A float value between 0 and 1 with the the increment of the scrolling window,
             relative to the window_size ([0,1])
    :keyword collect_statistics: A boolean value indicating to collect statistics for each generation
    :keyword distributed: A value indicating it the execution will be local and sequential (distributed=False),
             or parallel and distributed (distributed='dispy' or distributed='spark')
    :keyword cluster: If distributed='dispy' the list of cluster nodes, else if distributed='spark' it is the master node
    :return: the best genotype
    """

    file = kwargs.get('database_file', 'hyperparam.db')

    conn = hUtil.open_hyperparam_db(file)

    experiments = kwargs.get('experiments', 30)

    distributed = kwargs.get('distributed', False)

    fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
    shortname = str(fts_method.__module__).split('.')[-1]

    if distributed == 'dispy':
        nodes = kwargs.get('nodes', ['127.0.0.1'])
        cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes)
        kwargs['cluster'] = cluster

    ret = []
    for i in np.arange(experiments):
        print("Experiment {}".format(i))

        start = time.time()
        ret, statistics = GeneticAlgorithm(dataset, **kwargs)
        end = time.time()
        ret['time'] = end - start
        experiment = {'individual': ret, 'statistics': statistics}

        ret = process_experiment(shortname, experiment, datasetname, conn)

    if distributed == 'dispy':
        dUtil.stop_dispy_cluster(cluster, http_server)

    return ret
예제 #8
0
def execute(hyperparams, datasetname, dataset, **kwargs):

    nodes = kwargs.get('nodes', ['127.0.0.1'])

    individuals = []

    if 'lags' in hyperparams:
        lags = hyperparams.pop('lags')
    else:
        lags = [k for k in np.arange(50)]

    keys_sorted = [k for k in sorted(hyperparams.keys())]

    index = {}
    for k in np.arange(len(keys_sorted)):
        index[keys_sorted[k]] = k

    print("Evaluation order: \n {}".format(index))

    hp_values = [[v for v in hyperparams[hp]] for hp in keys_sorted]

    print("Evaluation values: \n {}".format(hp_values))

    cluster, http_server = dUtil.start_dispy_cluster(cluster_method,
                                                     nodes=nodes)
    conn = hUtil.open_hyperparam_db('hyperparam.db')

    for instance in product(*hp_values):
        partitions = instance[index['partitions']]
        partitioner = instance[index['partitioner']]
        mf = instance[index['mf']]
        alpha_cut = instance[index['alpha']]
        order = instance[index['order']]
        count = 0
        for lag1 in lags:  # o é o lag1
            _lags = [lag1]
            count += 1
            if order > 1:
                for lag2 in lags:  # o é o lag1
                    _lags2 = [lag1, lag1 + lag2]
                    count += 1
                    if order > 2:
                        for lag3 in lags:  # o é o lag1
                            count += 1
                            _lags3 = [lag1, lag1 + lag2, lag1 + lag2 + lag3]
                            individuals.append(
                                dict_individual(mf, partitioner, partitions,
                                                order, _lags3, alpha_cut))
                    else:
                        individuals.append(
                            dict_individual(mf, partitioner, partitions, order,
                                            _lags2, alpha_cut))
            else:
                individuals.append(
                    dict_individual(mf, partitioner, partitions, order, _lags,
                                    alpha_cut))

            if count > 10:
                jobs = []

                for ind in individuals:
                    print("Testing individual {}".format(ind))
                    job = cluster.submit(ind, dataset, **kwargs)
                    jobs.append(job)

                process_jobs(jobs, datasetname, conn)

                count = 0

                individuals = []

    dUtil.stop_dispy_cluster(cluster, http_server)