def run(algorithm, model): if algorithm == "DE": algo = DE(model) elif algorithm == "MaxWalkSat": algo = MaxWalkSat(model) print "Best State " + str(algo.run()) print "Total Evaluations: " + str(algo.evals)
def call_de(i, x, training_data, testing_data, fold_indexes, goal="Max", term="Early"): train_data = training_data.ix[fold_indexes[0]].values tune_data = training_data.ix[fold_indexes[1]].values test_data = testing_data.values de = DE(GEN=2, Goal=goal, termination=term) v, pareto = de.solve(process, OrderedDict(param_grid[i]['learners_para_dic']), param_grid[i]['learners_para_bounds'], param_grid[i]['learners_para_categories'], param_grid[i]['model'], x, [train_data, tune_data]) params = v.ind.values() training_data = training_data.values predicted_tune = param_grid[i]['model'](training_data[:, :-1], training_data[:, -1], test_data[:, :-1], params) predicted_default = param_grid[i]['model'](training_data[:, :-1], training_data[:, -1], test_data[:, :-1], None) val_tune = evaluation(x, predicted_tune, test_data[:, -1], test_data[:, :-1]) val_predicted = evaluation(x, predicted_default, test_data[:, -1], test_data[:, :-1]) print("For measure %s: default=%s, predicted=%s" % (x, val_predicted, val_tune)) return val_tune, params
def run_pop_algos_for_vid(): "run the pop algos for proj4 vid" # setup data to use data = Data('abalone', pd.read_csv(r'data/abalone.data', header=None), 8, False) # take a sample as results do not matter for this df = data.df.sample(100) data.split_data(data_frame=df) gen_algo = GA(1000, 4, data, max_runs=1000, mutation_rate=1) print("----------------------- RUNNING THE GA -----------------") # get chromosome object from GA bestC = gen_algo.run_GA() print("Best fitting vector From the GA") print(bestC.net_vector) client = NetworkClient(data) network = NeuralNetwork(data) new_Net = network.GADEnet(bestC.layers, bestC.net_vector) print("Printing testing results from the GA") print(client.testing(new_Net, bestC.outputs, bestC.network)) print("----------------------- GA DONE -----------------") print("---------------------------------------------------") print("---------------------------------------------------") print("---------------------------------------------------") print("----------------------- RUNNING DE -----------------") de_algo = DE(10, .7, 2, 4, data, max_runs=100, mutation_rate=.03) bestC = de_algo.run_DE() print("Best fitting vector from DE") print(bestC.net_vector) client = NetworkClient(data) network = NeuralNetwork(data) new_Net = network.GADEnet(bestC.layers, bestC.net_vector) print("Printing testing results from DE") print(client.testing(new_Net, bestC.outputs, bestC.network)) print("----------------------- DE DONE -----------------")
def call_de(i, x, training_data, testing_data, fold, goal="Max", eval_time=3600, lifes=5): #test_data = testing_data.values de = DE(NP=10, Goal=goal, eval_time=eval_time, num_lifes=lifes) v, num_evals, tuning_time = de.solve( process, OrderedDict(param_grid[i]['learners_para_dic']), param_grid[i]['learners_para_bounds'], param_grid[i]['learners_para_categories'], param_grid[i]['model'], x, training_data, fold) params = v.ind.values() #training_data = training_data.values predicted_tune = param_grid[i]['model'](training_data.iloc[:, :-1], training_data.iloc[:, -1], testing_data.iloc[:, :-1], params) predicted_default = param_grid[i]['model'](training_data.iloc[:, :-1], training_data.iloc[:, -1], testing_data.iloc[:, :-1], None) val_tune = evaluation(x, predicted_tune, testing_data.iloc[:, -1], testing_data.iloc[:, :-1]) val_predicted = evaluation(x, predicted_default, testing_data.iloc[:, -1], testing_data.iloc[:, :-1]) print("For measure %s: default=%s, predicted=%s" % (x, val_predicted, val_tune)) return val_tune, params, num_evals, tuning_time
def _test(res=''): seed(1) np.random.seed(1) path = ROOT + "/../data/preprocessed/" + res + ".txt" raw_data, labels = readfile1(path) temp = {} for i in range(5): ranges = range(len(labels)) shuffle(ranges) raw_data = raw_data[ranges] labels = labels[ranges] #print(raw_data) start_time = time.time() de = DE(Goal="Max", GEN=3, NP=10, termination="Early") v, _ = de.solve(learners[0], OrderedDict(learners_para_dic[0]), learners_para_bounds[0], learners_para_categories[0], file=res, term=7, data_samples=raw_data) corpus, _ = LDA_(raw_data, **v.ind) end_time = time.time() - start_time skf = StratifiedKFold(n_splits=5) for train_index, test_index in skf.split(corpus, labels): train_data, train_labels = corpus[train_index], labels[train_index] test_data, test_labels = corpus[test_index], labels[test_index] for j, le in enumerate(MLS): if le.__name__ not in temp: temp[le.__name__] = {} start_time1 = time.time() _, val = MLS[j](MLS_para_dic[j], train_data, train_labels, test_data, test_labels, 'Dist2Heaven') end_time1 = time.time() - start_time1 for m in metrics: if m not in temp[le.__name__]: temp[le.__name__][m] = [] temp[le.__name__][m].append(val[0][m]) if 'times' not in temp[le.__name__]: temp[le.__name__]['times'] = [] else: temp[le.__name__]['times'].append(end_time1 + end_time) if 'features' not in temp[le.__name__]: temp[le.__name__]['features'] = [] else: temp[le.__name__]['features'].append(val[1]) print(temp) with open('../dump/LDADE_FFT_' + res + '.pickle', 'wb') as handle: pickle.dump(temp, handle)
def test_de(self): data = Data('abalone', pd.read_csv(r'data/abalone.data', header=None), 8, False) df = data.df.sample(100) data.split_data(data_frame=df) de_algo = DE(50, .7, 2, 4, data, max_runs=100, mutation_rate=.03) bestC = de_algo.run_DE() print("Best fitting vector") print(bestC.net_vector) client = NetworkClient(data) network = NeuralNetwork(data) new_Net = network.GADEnet(bestC.layers, bestC.net_vector) print("Printing testing results") print(client.testing(new_Net, bestC.outputs, bestC.network))
def getData(self): """ 获取进化后的数据集 """ #Setep1 获取差分进化后SMOTER参数 de = DE(fitness=self.fitness, D=3, DRange=self.Drange, F=self.F, CR=self.CR, PopulationSize=self.PopulationSize, Lives=self.Lives) bestPars = de.evolution(otherPars=[self.data]) #Step2 使用最佳参数获得数据集 S = SMOTER(k=bestPars[0, 0], m=bestPars[0, 1], r=bestPars[0, 2], data=self.data) return (S.smoteR(), bestPars)
def visualize(DE, iterations, title='Differential Evolution'): results, time = DE.run(iterations) fig, ax = plt.subplots(figsize=(8, 8)) for x in range(10): plt.plot(results[:, x], label='Vector' + str(x)) ax.ticklabel_format(style='plain') plt.ylabel("Profit/Fitness") plt.xlabel("Iteration") print('Time needed:', time) plt.legend() plt.show()
def LDADE(config): seed(config["random_state"]) np.random.seed(config["random_state"]) de = DE(F=config["F"], CR=config["CR"], GEN=config["GEN"], Goal=config["Goal"], termination=config["termination"], random_state=config["random_state"]) v, _ = de.solve(config["fitness"], config["learners_para"], config["learners_para_bounds"], config["learners_para_categories"], term=config["term"], data_samples=config["data_samples"], random_state=config["random_state"], max_iter=config["max_iter"]) return v.ind, v.fit
def transform(self, x, y): """ 用DE优化SMOTEND,针对特定数据集 模型和评估指标的优化 Parameters: ------------------- Drange,F,CR,PopulationSize,Live: DE的相关参数 x,y: 用于评估参数的数据集 metric: 评估指标 modelName: 用的模型 times: 验证时计算的次数 Return: ------- (newX,newY,bestPars(array(1*d))) """ metric, modelName, DRange, F, CR, PopulationSize, Lives = self.metric, self.modelName, self.DRange, self.F, self.CR, self.PopulationSize, self.Lives #Setep1 获取差分进化后SMOTER参数 de = DE(fitness=SMOTENDDE_Fitness, D=3, DRange=DRange, F=F, CR=CR, PopulationSize=PopulationSize, Lives=Lives) paras, _ = de.evolution(otherPars=(x, y, modelName, metric)) #Step2 返回最后一代参数 return paras
def de_fft(res=''): seed(1) np.random.seed(1) paths = [os.path.join(data_path, file_name) for file_name in data[res]] train_df = pd.concat([pd.read_csv(path) for path in paths[:-1]], ignore_index=True) test_df = pd.read_csv(paths[-1]) ### getting rid of first 3 columns train_df, test_df = train_df.iloc[:, 3:], test_df.iloc[:, 3:] train_df['bug'] = train_df['bug'].apply(lambda x: 0 if x == 0 else 1) test_df['bug'] = test_df['bug'].apply(lambda x: 0 if x == 0 else 1) final_dic = {} temp = {} for x in measures: l = [] l1 = [] start_time = time.time() for _ in xrange(repeats): ## Shuffle train_df = train_df.sample(frac=1).reset_index(drop=True) test_df = test_df.sample(frac=1).reset_index(drop=True) #train_df, test_df = train_df.values, test_df.values if x == "d2h": print(x) de = DE(GEN=5, Goal="Min", termination="Late") v, pareto = de.solve(fft_process, OrderedDict(learners_para_dic), learners_para_bounds, learners_para_categories, FFT, x, train_df) val = fft_eval(FFT, train_df, test_df, x) l.append(val) l1.append(v.ind) else: print(x) de = DE(GEN=5, Goal="Max", termination="Late") v, pareto = de.solve(fft_process, OrderedDict(learners_para_dic), learners_para_bounds, learners_para_categories, FFT, x, train_df) val = fft_eval(FFT, train_df, test_df, x) l.append(val) l1.append(v.ind) total_time = time.time() - start_time temp[x] = [l, l1, total_time] final_dic[FFT.__name__] = temp print(final_dic) with open('dump/' + res + '_early.pickle', 'wb') as handle: pickle.dump(final_dic, handle)
def early(corpus, labels, ranges, class_flag, res): temp = {} for num, i in enumerate(learners_class): start_time = time.time() l = [] paras = [] if class_flag: for _ in range(10): shuffle(ranges) corpus, labels = corpus[ranges], labels[ranges] skf = StratifiedKFold(n_splits=10) for train_index, test_index in skf.split(corpus, labels): train_data, train_labels = corpus[train_index], labels[ train_index] test_data, test_labels = corpus[test_index], labels[ test_index] de = DE(GEN=5) v, pareto = de.solve(learners_class[num], OrderedDict(learners_para_dic[num]), learners_para_bounds[num], learners_para_categories[num], train_data, train_labels, test_data, test_labels) l.append(v.fit) paras.append(v.ind) temp[learners_class[num].__name__] = [ l, paras, time.time() - start_time ] else: for _ in range(10): for k in range(10): shuffle(ranges) train_data, train_labels, test_data, test_labels = corpus[ranges[:int(0.8 * len(ranges))]] \ , labels[ranges[:int(0.8 * len(ranges))]] \ , corpus[ranges[int(0.8 * len(ranges)):]], labels[ranges[int(0.8 * len(ranges)):]] de = DE(Goal="Min", GEN=5) v, pareto = de.solve(learners_reg[num], OrderedDict(learners_para_dic[num]), learners_para_bounds[num], learners_para_categories[num], train_data, train_labels, test_data, test_labels) l.append(v.fit) paras.append(v.ind) temp[learners_reg[num].__name__] = [ l, paras, time.time() - start_time ] with open('../dump/' + res + '.pickle', 'wb') as handle: pickle.dump(temp, handle)
import numpy as np import sys import toml import pickle from util import get_funct from DE import DE argv = sys.argv[1:] conf = toml.load(argv[0]) name = conf['funct'] funct = get_funct(name) NP = conf['NP'] max_iter = conf['max_iter'] bounds = np.array(conf['bounds']) lb = bounds[:, 0] ub = bounds[:, 1] F = conf['F'] CR = conf['CR'] solver = DE(funct, NP, max_iter, lb, ub, F, CR) solver.optimize() dataset = solver.dataset with open('dataset.pickle', 'wb') as f: pickle.dump(dataset, f)
from DE import DE for model in ["Golinski", "Schaffer", "Kursawe", "Osyczka"]: de = DE(model) print "Running DE for " + model + "with 33% elite sampling" de.run(s=0.33) print "Running DE for " + model + "with 50% elite sampling" de = DE(model) de.run(s=0.50) print "Running DE for " + model + "with 70% elite sampling" de = DE(model) de.run(s=0.70) print "Running DE for " + model + "with 100% elite sampling" de = DE(model) de.run(s=1)
result_index = func_no * times + time # 储存运行结果 data['GA'].iloc[result_index] = ga.final_result # 转存到CSV文件中 data.to_csv('results.csv') if opt == 'DE': # DE for func_no in range(benchmark): # 输出当前的benchmark函数 print('----当前benchmark函数为:', func_no + 1) # 设置各项参数 de = DE(size=100, dim=10, pos_max=100, pos_min=-100, max_iter=1000, func_no=func_no + 1, F=1, CR=0.5) # 多次运行 for time in range(times): # 初始化 de.initial() # 开始迭代 de.optimal() # 收敛结果 print('--------第', time + 1, '次收敛结果为:', de.final_result) # 运行结果的索引 result_index = func_no * times + time # 储存运行结果 data['DE'].iloc[result_index] = de.final_result
def _test(res=''): seed(1) np.random.seed(1) paths = [os.path.join(data_path, file_name) for file_name in data[res]] train_df = pd.concat([pd.read_csv(path) for path in paths[:-1]], ignore_index=True) test_df = pd.read_csv(paths[-1]) ### getting rid of first 3 columns train_df, test_df = train_df.iloc[:, 3:], test_df.iloc[:, 3:] train_df['bug'] = train_df['bug'].apply(lambda x: 0 if x == 0 else 1) test_df['bug'] = test_df['bug'].apply(lambda x: 0 if x == 0 else 1) final_dic={} for i in learners: temp={} for x in measures: l=[] l1=[] start_time = time.time() for _ in xrange(repeats): ## Shuffle train_df = train_df.sample(frac=1).reset_index(drop=True) test_df = test_df.sample(frac=1).reset_index(drop=True) if x=="d2h": #de = DE(GEN=5, Goal="Min") de = DE(GEN=5, Goal="Min", termination="Late") v, pareto = de.solve(main, OrderedDict(learners_para_dic[0]), learners_para_bounds[0], learners_para_categories[0], i, x, train_df) paras = v.ind data_train = smote.execute(paras.values(), samples=train_df.iloc[:, :-1], labels=train_df.iloc[:, -1:]) lab = [y for a in data_train.iloc[:, -1:].values.tolist() for y in a] labels = i(data_train.iloc[:, :-1].values, lab, test_df.iloc[:, :-1].values) lab = [y for a in test_df.iloc[:, -1:].values.tolist() for y in a] val = evaluation(x, labels, lab, test_df) l.append(val) l1.append(v.ind) elif x== "popt20": #de = DE(GEN=5, Goal="Max") de = DE(GEN=5, Goal="Max", termination="Late") v, pareto = de.solve(main, OrderedDict(learners_para_dic[0]), learners_para_bounds[0], learners_para_categories[0], i, x, train_df) paras = v.ind data_train = smote.execute(paras.values(), samples=train_df.iloc[:, :-1], labels=train_df.iloc[:, -1:]) lab = [y for a in data_train.iloc[:, -1:].values.tolist() for y in a] labels = i(data_train.iloc[:, :-1].values, lab, test_df.iloc[:, :-1].values) lab = [y for a in test_df.iloc[:, -1:].values.tolist() for y in a] val = evaluation(x, labels, lab, test_df) l.append(val) l1.append(v.ind) total_time=time.time() - start_time temp[x]=[l,l1, total_time] final_dic[i.__name__]=temp print(final_dic) with open('../dump/' + res + '_late.pickle', 'wb') as handle: pickle.dump(final_dic, handle)
import numpy as np from DE import DifferentialEvolution as DE ''' Simplest possible use ''' # define a function from R^n to R to minimize def f(x): return np.dot(x, x) # set the dimension of the search space n = 2 # init an DifferentialEvolution instance optimizer = DE(f, n, iterations=1000) # runs the optimization optimizer.run() # then you can save your results optimizer.write_results('results.json') # or just show then on the screen print(optimizer.get_results())
import numpy as np from DE import DE if __name__ == "__main__": bound = np.tile([[-600], [600]], 25) dea = DE.DifferentialEvolutionAlgorithm(60, 25, bound, 1000, [0.8, 0.6]) dea.solve()
PARAMETERS TO TEST ''' functions = [f1, f2, f3, f4, f5] intervals = [(-1, 1), (-5.12, 5.12), (-5.12, 5.12), (-5, 5), (-600, 600)] F = 0.75 iterations = 50005 trials = 100 for n in [5, 50]: for cr in [False, .5]: for base_change in [False, 100]: for i in range(1): filename = 'results/f{}-n={}-cr={}-base_change={}.json'.format( i + 1, n, cr, base_change) print('running {}'.format(filename)) optimizer = DE(functions[i], n, N=2 * n, crossover_p=cr, scaling_factor=F, populate_method='cube', populate_data=intervals[i], iterations=iterations, base_change=base_change, get_history=True, seed=range(1, trials + 1), trials=trials) optimizer.run(processes=trials) optimizer.write_results(filename)