def execute(dataset, **kwargs): file = kwargs.get('file', 'hyperparam.db') conn = hUtil.open_hyperparam_db(file) experiments = kwargs.get('experiments', 30) distributed = kwargs.get('distributed', False) if distributed == 'dispy': import dispy from pyFTS.distributed import dispy as dUtil nodes = kwargs.get('nodes', ['127.0.0.1']) cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes) kwargs['cluster'] = cluster ret = [] for i in np.arange(experiments): print("Experiment {}".format(i)) start = time.time() ret, statistics = GeneticAlgorithm(dataset, **kwargs) end = time.time() ret['time'] = end - start experiment = {'individual': ret, 'statistics': statistics} ret = process_experiment(experiment, '', conn) if distributed == 'dispy': dUtil.stop_dispy_cluster(cluster, http_server) return ret
def execute(datasetname, dataset, **kwargs): nodes = kwargs.get('nodes', ['127.0.0.1']) cluster, http_server = Util.start_dispy_cluster(cluster_method, nodes=nodes) conn = hUtil.open_hyperparam_db('hyperparam.db') ngen = kwargs.get('ngen', 70) npop = kwargs.get('npop', 20) pcruz = kwargs.get('pcruz', .8) pmut = kwargs.get('pmut', .2) option = kwargs.get('option', 1) jobs = [] for i in range(kwargs.get('experiments', 30)): print("Experiment {}".format(i)) job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option) jobs.append(job) process_jobs(jobs, datasetname, conn) Util.stop_dispy_cluster(cluster, http_server)
def execute(datasetname, dataset, **kwargs): """ Batch execution of Distributed Evolutionary Hyperparameter Optimization (DEHO) for monovariate methods :param datasetname: :param dataset: The time series to optimize the FTS :keyword database_file: :keyword experiments: :keyword distributed: :keyword ngen: An integer value with the maximum number of generations, default value: 30 :keyword mgen: An integer value with the maximum number of generations without improvement to stop, default value 7 :keyword npop: An integer value with the population size, default value: 20 :keyword pcross: A float value between 0 and 1 with the probability of crossover, default: .5 :keyword psel: A float value between 0 and 1 with the probability of selection, default: .5 :keyword pmut: A float value between 0 and 1 with the probability of mutation, default: .3 :keyword fts_method: The FTS method to optimize :keyword parameters: dict with model specific arguments for fts_method :keyword elitism: A boolean value indicating if the best individual must always survive to next population :keyword initial_operator: a function that receives npop and return a random population with size npop :keyword random_individual: create an random genotype :keyword evalutation_operator: a function that receives a dataset and an individual and return its fitness :keyword selection_operator: a function that receives the whole population and return a selected individual :keyword crossover_operator: a function that receives the whole population and return a descendent individual :keyword mutation_operator: a function that receives one individual and return a changed individual :keyword window_size: An integer value with the the length of scrolling window for train/test on dataset :keyword train_rate: A float value between 0 and 1 with the train/test split ([0,1]) :keyword increment_rate: A float value between 0 and 1 with the the increment of the scrolling window, relative to the window_size ([0,1]) :keyword collect_statistics: A boolean value indicating to collect statistics for each generation :keyword distributed: A value indicating it the execution will be local and sequential (distributed=False), or parallel and distributed (distributed='dispy' or distributed='spark') :keyword cluster: If distributed='dispy' the list of cluster nodes, else if distributed='spark' it is the master node :return: the best genotype """ file = kwargs.get('database_file', 'hyperparam.db') conn = hUtil.open_hyperparam_db(file) experiments = kwargs.get('experiments', 30) distributed = kwargs.get('distributed', False) fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS) shortname = str(fts_method.__module__).split('.')[-1] if distributed == 'dispy': nodes = kwargs.get('nodes', ['127.0.0.1']) cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes) kwargs['cluster'] = cluster ret = [] for i in np.arange(experiments): print("Experiment {}".format(i)) start = time.time() ret, statistics = GeneticAlgorithm(dataset, **kwargs) end = time.time() ret['time'] = end - start experiment = {'individual': ret, 'statistics': statistics} ret = process_experiment(shortname, experiment, datasetname, conn) if distributed == 'dispy': dUtil.stop_dispy_cluster(cluster, http_server) return ret
def execute(hyperparams, datasetname, dataset, **kwargs): nodes = kwargs.get('nodes', ['127.0.0.1']) individuals = [] if 'lags' in hyperparams: lags = hyperparams.pop('lags') else: lags = [k for k in np.arange(50)] keys_sorted = [k for k in sorted(hyperparams.keys())] index = {} for k in np.arange(len(keys_sorted)): index[keys_sorted[k]] = k print("Evaluation order: \n {}".format(index)) hp_values = [[v for v in hyperparams[hp]] for hp in keys_sorted] print("Evaluation values: \n {}".format(hp_values)) cluster, http_server = dUtil.start_dispy_cluster(cluster_method, nodes=nodes) conn = hUtil.open_hyperparam_db('hyperparam.db') for instance in product(*hp_values): partitions = instance[index['partitions']] partitioner = instance[index['partitioner']] mf = instance[index['mf']] alpha_cut = instance[index['alpha']] order = instance[index['order']] count = 0 for lag1 in lags: # o é o lag1 _lags = [lag1] count += 1 if order > 1: for lag2 in lags: # o é o lag1 _lags2 = [lag1, lag1 + lag2] count += 1 if order > 2: for lag3 in lags: # o é o lag1 count += 1 _lags3 = [lag1, lag1 + lag2, lag1 + lag2 + lag3] individuals.append( dict_individual(mf, partitioner, partitions, order, _lags3, alpha_cut)) else: individuals.append( dict_individual(mf, partitioner, partitions, order, _lags2, alpha_cut)) else: individuals.append( dict_individual(mf, partitioner, partitions, order, _lags, alpha_cut)) if count > 10: jobs = [] for ind in individuals: print("Testing individual {}".format(ind)) job = cluster.submit(ind, dataset, **kwargs) jobs.append(job) process_jobs(jobs, datasetname, conn) count = 0 individuals = [] dUtil.stop_dispy_cluster(cluster, http_server)