def test_pickle(): """Check pickability""" # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) score = est.score(boston.data[500:, :], boston.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(boston.data[500:, :], boston.target[500:]) assert_equal(score, score2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) X_new = est.transform(boston.data[500:, :]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) X_new2 = est2.transform(boston.data[500:, :]) assert_array_almost_equal(X_new, X_new2) # Check the classifier est = SymbolicClassifier(generations=2, random_state=0) est.fit(cancer.data[:100, :], cancer.target[:100]) score = est.score(cancer.data[500:, :], cancer.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(cancer.data[500:, :], cancer.target[500:]) assert_equal(score, score2)
def train(): est_gp = SymbolicRegressor(population_size=150, generations=20, stopping_criteria=0.001, p_crossover=0.8, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.05, max_samples=0.9, verbose=1, metric='mean absolute error', parsimony_coefficient=0.01) est_gp.fit(X_train, y_train) print(est_gp._program) print(est_gp.score(X_train, y_train))
def train(x,y_truth,X_train,y_train,X_test,y_test,target_func,noise_rate,noise_level): """ x: 目标函数的分布范围 y_truth: 目标函数的真实值 X_train: 训练数据 y_train: 训练数据值(带噪声) X_test: 测试数据 y_test: 测试数据值 noise_rate: 噪声率 noise_level: 噪声水平 得出用所有数据进行训练的拟合结果。拟合效果有可能会受噪声数据的影响 """ #查看训练所用的数据 print('---训练数据---') print(np.c_[X_train,y_train]) #定义符号回归器 est_gp = SymbolicRegressor(population_size=5000, function_set=['add','sub','mul','div'],#'sin','sqrt','cos'],#,'cos','sqrt','log','abs','neg','inv','tan'], generations=10, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1,metric='mean absolute error', parsimony_coefficient=0.01, random_state=0,const_range=(-1,1)) #用训练集进行拟合训练 est_gp.fit(X_train.reshape(-1,1), y_train) #得到测试数据的预测值 y_pred = est_gp.predict(X_test.reshape(-1,1)) #得到R^2值 score_gp = est_gp.score(X_test.reshape(-1,1), y_test) #训练集的均方误差 test_mse = mean_squared_error(y_test,y_pred) print('拟合结果',str(est_gp._program)) print('R^2 : %.6f'%score_gp) print('MSE : %.6f'%test_mse) #可视化目标曲线 plt.xlabel('$x$',fontsize = 18) plt.ylabel('$y$',fontsize = 18) plt.plot(x,y_truth,label = target_func) plt.legend(loc = 'best',fontsize = 18) #可视化训练数据集 plt.scatter(X_train,y_train,label = 'NoisyData',alpha = 0.9) plt.legend(loc = 'best',fontsize = 18) #可视化拟合曲线 data = np.c_[X_test,y_pred] data = data[np.lexsort(data[:,::-1].T)] plt.plot(data[:,0], data[:,1], label = 'GP : '+str(est_gp._program)) #标题 fmt = '$R^2 =\/ {0:.6f}$ , $MSE =\/ {1:.6f}$'.format(score_gp,test_mse) plt.title(fmt,fontproperties = 'SimHei',fontsize = 20) plt.legend(loc = 'best',fontsize = 18)
def main(): x = np.genfromtxt('x_train.csv', delimiter=',').reshape((1000, 1)) y = np.genfromtxt('y_train.csv', delimiter=',') est_gp = SymbolicRegressor(population_size=50, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0) est_gp.fit(x, y) print(est_gp._program) est_tree = DecisionTreeRegressor() est_tree.fit(x, y) est_rf = RandomForestRegressor() est_rf.fit(x, y) x0 = np.arange(-1, 1, 1 / 10.) x1 = np.arange(-1, 1, 1 / 10.) x0, x1 = np.meshgrid(x0, x1) y_truth = 3 * x0**2 + 5 * x0 + 1 # exact function we are estimating y_gp = est_gp.predict(np.c_[x0.ravel()]).reshape(x0.shape) score_gp = est_gp.score(x, y) y_tree = est_tree.predict(np.c_[x0.ravel()]).reshape(x0.shape) score_tree = est_tree.score(x, y) y_rf = est_rf.predict(np.c_[x0.ravel()]).reshape(x0.shape) score_rf = est_rf.score(x, y) for i, (ys, score, title) in enumerate([(y_truth, None, "Ground Truth"), (y_gp, score_gp, "SymbolicRegressor"), (y_tree, score_tree, "DecisionTreeRegressor"), (y_rf, score_rf, "RandomForestRegressor")]): plt.subplot(2, 2, i + 1) plt.plot(x0, ys, 'C0o') plt.grid(True, which='both') plt.axhline(y=0, color='k') plt.axvline(x=0, color='k') plt.show()
def test_pickle(): """Check pickability""" # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) score = est.score(boston.data[500:, :], boston.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(boston.data[500:, :], boston.target[500:]) assert_equal(score, score2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) X_new = est.transform(boston.data[500:, :]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) X_new2 = est2.transform(boston.data[500:, :]) assert_array_almost_equal(X_new, X_new2)
def test_symbolic_regression(plotOnly=True): nsample = 4000 sig = 0.2 x = np.linspace(-50,50,nsample) X = np.column_stack( ( x/5, 10*np.sin(x), (x-5)**3, np.ones(nsample) ) ) beta = [0.01, 1, 0.001, 5.] y_true = np.dot(X,beta) y = y_true + sig * np.random.normal(size=nsample) df = pd.DataFrame() df["x"] = x; df["y"] = y fig,ax = plt.subplots() ax.plot(df.x, df.y, c="k", ls="--",label="Truth") ax.set_title("Ground truth") X = df[["x"]]; y = df.y #Split into train test data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=constant ) #Converter to print the function using sympy converter = { 'sub': lambda x, y : x - y, 'div': lambda x, y : x/y, 'mul': lambda x, y : x*y, 'add': lambda x, y : x + y, 'neg': lambda x : -x, 'pow': lambda x, y : x**y, 'sin': lambda x : sin(x), 'cos': lambda x : cos(x), 'inv': lambda x: 1/x, 'sqrt': lambda x: x**0.5, 'pow3': lambda x: x**3 } if plotOnly == False: #Train the regressor function_set = [ "add", "sub", "mul", "div", "cos", "sin", "neg", "inv" ] #Instantiate the symbolic regression SR = SymbolicRegressor( population_size=5000, function_set=function_set, generations=5, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.001, random_state=0, feature_names=X_train.columns ) #Fit vs. training data SR.fit(X_train, y_train) dump(SR,"./SR_Test.bin") try: SR = load("./SR_Test.bin") except: raise IOError("./SR_Test.bin does not exist, train a model first") print( "R$^2$: %s"%(SR.score(X_test, y_test)) ) #Write the function expression func = sympify( (SR._program), locals=converter ) print(func) with open("./test_print_formula.txt","w") as f: f.write(str(func)) #Predict using trained SR y_pred = SR.predict(df.x.to_numpy().reshape(-1,1)) #Overlay the plot ax.scatter( df.x, y_pred, label="SR" ) ax.legend() plt.show()
dump(SR,"./TrainedRegressor.pkl") else: SR = load("./TrainedRegressor.pkl") #Print-out the regression formula formula = sympify( (SR._program), locals=converter ) #Print formula, R2 and save it as a txt print("Formula: ", formula) print('R$^2$:',SR.score( X_test, #scaled X_test mmy.transform( y_test #scaled y_test ) ) ) with open("./formula.txt","w") as f: f.write(str(formula)) #Predict the test data y_pred = SR.predict(X_test).reshape(-1,1) #Scale back y_pred y_pred = mmy.inverse_transform(y_pred) #Dump y_pred vs y_test df = pd.DataFrame()
def GP(self): data_training = pd.DataFrame(self.GP_training) # data_training.columns = data_training.columns target_training = pd.DataFrame(self.training['result']) target_training.columns = ['result'] # symbolic regression function_set = ('add', 'sub', 'mul', 'div', 'sqrt', 'max', 'min') sr = SymbolicRegressor(population_size=50000, generations=10, stopping_criteria=0.01, function_set=function_set, p_crossover=0.1, p_subtree_mutation=0.1, p_hoist_mutation=0.15, p_point_mutation=0.2, max_samples=0.7, verbose=1, parsimony_coefficient=0.025, random_state=0) sr.fit(data_training, target_training) # check results # Returns the coefficient of determination R^2 of the prediction. self.logger.write(sr.score(data_training, target_training)) self.logger.write("\n") data_test = pd.DataFrame(self.GP_testing) target_test = pd.DataFrame(self.test['result']) predict_test = sr.predict(data_test) pre = list() for i in predict_test: if i < 0: pre.append(-1) elif i > 0: pre.append(1) else: pre.append(0) predict_test = np.asarray(pre) target_test.columns = ['result'] self.logger.write(" 1.1- f1 score for GP: ") self.logger.write("\n") self.logger.write(f1_score(target_test, predict_test, average='macro')) self.logger.write("\n") self.logger.write(" 1.2- f1 score for GP None: ") self.logger.write("\n") self.logger.write(f1_score(target_test, predict_test, average=None)) self.logger.write("\n") # Calculate the accuracy self.logger.write("2 - accuracy score for GP: ") self.logger.write("\n") self.logger.write( accuracy_score(target_test, predict_test, normalize=True)) self.logger.write("\n") # KFold Cross Validation approach kf = KFold(n_splits=10, shuffle=False) kf.split(data_test) # Initialize the accuracy of the models to blank list. The accuracy of each model will be appended to this list accuracy_model = [] index = 1 # Iterate over each train-test split for train_index, test_index in kf.split(data_test): # Split train-test X_train, X_test = data_test.iloc[train_index], data_test.iloc[ test_index] y_train, y_test = target_test.iloc[train_index], target_test.iloc[ test_index] # Train the model sr.fit(X_train, y_train) # Append to accuracy_model the accuracy of the model predict_test = sr.predict(X_test) pre = list() for i in predict_test: if i < 0: pre.append(-1) elif i > 0: pre.append(1) else: pre.append(0) predict_test = np.asarray(pre) acc = accuracy_score(y_test, predict_test, normalize=True) * 100 self.logger.write("K-fold number %d, accuracy_score %d", index, acc) self.logger.write("\n") accuracy_model.append(acc) index += 1 # Print the accuracy self.logger.write("3 - K-Fold Cross Validation for GP: ") self.logger.write("\n") self.logger.write(accuracy_model) self.logger.write("\n")
verbose=1, parsimony_coefficient=0.01, random_state=0) est_gp.fit(X_train, y_train) print(est_gp._program) from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestRegressor est_tree = DecisionTreeRegressor() est_tree.fit(X_train, y_train) est_rf = RandomForestRegressor() est_rf.fit(X_train, y_train) y_gp = est_gp.predict(np.c_[x_0.ravel(), x_1.ravel()]).reshape(x_0.shape) score_gp = est_gp.score(X_test, y_test) y_tree = est_tree.predict(np.c_[x_0.ravel(), x_1.ravel()]).reshape(x_0.shape) score_tree = est_tree.score(X_test, y_test) y_rf = est_rf.predict(np.c_[x_0.ravel(), x_1.ravel()]).reshape(x_0.shape) score_rf = est_rf.score(X_test, y_test) fig = plt.figure(figsize=(8, 6)) for i, (y, score, title) in enumerate([(y_truth, None, "Ground Truth"), (y_gp, score_gp, "SymbolicRegressor"), (y_tree, score_tree, "DecisionTreeRegressor"), (y_rf, score_rf, "RandomForestRegressor")]): ax = fig.add_subplot(2, 2, i + 1, projection='3d') ax.set_xlim(-1, 1)
comparison=True, transformer=True, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, const_range=(-20.0, 20.0), parsimony_coefficient=0.01, random_state=1, metric='mse') est_gp.fit(data_train, data1_train) print est_gp._program score_gp = est_gp.score(data_test, data1_test) print score_gp p = est_gp.predict(features) xc = np.arange(0, len(labels1), 1) xa = np.arange(0, x, 1) xb = np.arange(x - 1, len(labels1), 1) #print xb.size if xb.size != len(p[x - 1:]): xb = np.arange(x - 1, len(labels1) - 1, 1) #print xa.size, xb.size,xc.size print len(p[0:x]), len(p[x - 1:]), len(labels1) print p, labels1 fig = plt.figure()
#function_set = [logic_and, logic_not,logic_or] function_set = [logic_and,logic_or,logic_xor,logic_not] est_gp = SymbolicRegressor(population_size=100, generations=500, #stopping_criteria=0.01, tournament_size=2, function_set= function_set, parsimony_coefficient=0.009, max_samples=1.0, verbose=1, p_crossover=0.9, p_subtree_mutation=0.1, p_hoist_mutation=0.0, p_point_mutation=0.0, n_jobs=-1) est_gp.fit(X,Y) print(est_gp._program) print("-------------------------------") #print(est_gp._programs) score_gp = est_gp.score(X, Y) print(score_gp) graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz()) graph.write_svg('test.svg') #res = Image(graph.create_png()) #display(res)
def ransac(x,y_truth,X_train,y_train,X_test,y_test,target_func,noise_rate,noise_level): """ x: 目标函数的分布范围 y_truth: 目标函数的真实值 X_train: 训练数据 y_train: 训练数据值(带噪声) X_test: 测试数据 target_func :目标函数表达式 y_test: 测试数据值 noise_rate: 噪声率 noise_level: 噪声水平 利用部分数据集进行训练,并根据拟合结果来重新选择训练数据, 通过这种方式来提高对噪声的鲁棒性。 """ #最大迭代次数 max_iter = 5 #数据集大小 length = X_train.shape[0] #噪声数据 y_noise = y_train #噪声数据数量 noise_count = int(length*noise_rate) #训练数据数量,如果噪声率λ小于0.5,则选取(1-λ)length个数据点, #否则选取0.5length个数据点 if noise_rate <= 0.5: pure_count = length - noise_count else: pure_count = length//2 #迭代计数 count = 0 #测试集R^2得分 test_score = [] #测试集均方误差 test_mse = [] #拟合曲线表达式 result = [] #训练集 train_data = np.c_[X_train,y_noise] print('------所有训练数据------') print(train_data) print('-----------------------') #随机采样初始训练数据集,定义一个顺序列表,将列表打乱取前qure_count个索引数据 lst = list(range(length)) np.random.shuffle(lst) #初始训练数据:数量为无噪声数据个数 random_train_data = train_data[lst[:pure_count]] #保存每轮训练的数据集 data_list = [0]*max_iter while count < max_iter: #保存当前训练数据集 data_list[count] = random_train_data print('------------------------------第'+str(count)+'轮训练------------------------------') print('----该轮训练使用的数据----') print(random_train_data) print('-------------------------') #符号回归器 est_gp = SymbolicRegressor(population_size=5000, function_set=['add','sub','mul','div'],#'sin','sqrt','cos'],#,'cos','sqrt','log','abs','neg','inv','tan'], generations=10, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1,metric='mean absolute error', parsimony_coefficient=0.01, random_state=0,const_range=(-1,1)) #用训练数据去拟合 est_gp.fit(random_train_data[:,0].reshape(-1,1), random_train_data[:,1]) #得到拟合表达式 print('拟合结果 : ',est_gp._program) result.append(str(est_gp._program)) #所有数据集的预测值 y_pred = est_gp.predict(X_train.reshape(-1,1)) #测试集的预测值 ytest_pred = est_gp.predict(X_test.reshape(-1,1)) #用于训练数据的预测值 #ytrain_pred = est_gp.predict(random_train_data[:,0].reshape(-1,1)) #测试集的R^2值 score = est_gp.score(X_test.reshape(-1,1),y_test) test_score.append(score) #训练数据的均方误差 mse = mean_squared_error(ytest_pred,y_test) test_mse.append(mse) #所有训练数据值与预测值的差值 diff = abs(y_pred - y_train) #选取差值最小的前pure_count组数据,当做下一轮训练数据 flag = np.where(diff < sorted(diff)[pure_count],1,0) temp_data = train_data[flag == 1] #如果新训练集和上轮数据一样,则退出循环 if (temp_data == random_train_data).all(): break else: #新的训练数据 random_train_data = train_data[flag == 1] count += 1 print('MSE : {0} , R^2 : {1} '.format(test_mse[-1],test_score[-1])) ytest_pred = est_gp.predict(X_test.reshape(-1,1)) ytest_score = est_gp.score(X_test.reshape(-1,1),y_test) ytest_mse = mean_squared_error(ytest_pred,y_test) #可视化目标函数 plt.xlabel('$x$',fontsize = 18) plt.ylabel('$y$',fontsize = 18) plt.plot(x,y_truth,label = target_func) plt.legend(loc = 'best',fontsize = 18) #可视化训练数据 plt.scatter(X_train,y_noise,label = 'NoisyData',alpha = 0.9) plt.legend(loc = 'best',fontsize = 18) #可视化拟合曲线 data = np.c_[X_test,ytest_pred] data = data[np.lexsort(data[:,::-1].T)] plt.plot(data[:,0], data[:,1], label = 'RCGP : '+str(est_gp._program)) fmt = '$R^2 =\/ {0:.6f}$ , $MSE =\/ {1:.6f}$'.format(ytest_score,ytest_mse) plt.title(fmt,fontproperties = 'SimHei',fontsize = 20) plt.legend(loc = 'best',fontsize = 16) print(result) print('mse: ',test_mse) print('R^2: ',test_score) print() print("R^2 : %.6f"%test_score[-1]) print("MSE : %.6f"%test_mse[-1]) #print(est_gp.score(X_test.reshape(-1,1),y_test)) return data_list
warnings.filterwarnings("ignore") def is_less_than_zero(x): result = (x < 0) return result.astype(int) def is_greater_than_or_equal_to_zero(x): result = (x >= 0) return result.astype(int) is_lt_zero = make_function(is_less_than_zero, "is_lt_zero", arity=1) is_gte_zero = make_function(is_greater_than_or_equal_to_zero, "is_gte_zero", arity=1) function_set = [is_lt_zero, is_gte_zero, "mul", "add", "neg"] X = np.arange(-10, 11).reshape(-1, 1) y = np.abs(X).reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X.tolist(), y.tolist()) my_abs_gp = SymbolicRegressor(function_set=function_set, init_method="grow", parsimony_coefficient=0.0625, verbose=True) my_abs_gp.fit(X_train, y_train) print(my_abs_gp.score(X_test, y_test)) print(my_abs_gp._program)
i += 1 continue locations = line.split() if len(locations) == 2: x.append([float(locations[0])]) y.append(float(locations[1])) else: continue est_gp = SymbolicRegressor(population_size=5000, generations=15, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.08, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=50) est_gp.fit(x, y) print("Accuracy: " + str(est_gp.score(x, y) * 100) + "%") print("Function: " + str(est_gp)) # noinspection PyProtectedMember # graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz()) # Image(graph.create_png()) # graph.write_png("dtree.png")
init_depth=(6, 13), max_samples=0.4, verbose=1, n_jobs=-1, metric='rmse', parsimony_coefficient=0.0005, random_state=1234) if (regressor >= 3): gp.fit(train_x, train_y) predict_y = gp.predict(test_x) predict_y[predict_y < 0] = 0 # only positive values print('\nDetails about the results using Genetic Programming\n') print(gp._program) print('R2(max) = ', gp.score(train_x, train_y)) # summary of the results print('Raw fitness = ', gp._program.raw_fitness_) #print('Fitness = ',gp._program.fitness_) print('OOB fitness = ', gp._program.oob_fitness_) print('Depth = ', gp._program.depth_) print('Length = ', gp._program.length_, '\n') ''' Comments: raw_fitness_ : The raw fitness of the individual program. fitness_ : The penalized fitness of the individual program. oob_fitness_ : The out-of-bag raw fitness of the individual program for the held-out samples. Only present when sub-sampling was used in the estimator by specifying max_samples < 1.0. depth_ : The maximum depth of the program tree.
# data = np.loadtxt('mydata2.txt') # X_train = data[:, 0:8] # Y_train = data[:, 8] # # X_test = X_train # Y_test = Y_train random_engine = check_random_state(0) X_train = random_engine.uniform(-1, 1, 10000).reshape(-1, 1) Y_train = np.sinh(X_train) X_test = random_engine.uniform(-1, 1, 10000).reshape(-1, 1) Y_test = np.sinh(X_test) est_gp = SymbolicRegressor(population_size=5000, generations=1000, stopping_criteria=0.01, p_crossover=0.6, p_subtree_mutation=0.1, p_hoist_mutation=0.1, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, n_jobs=1, function_set=('add', 'mul', 'max')) est_gp.fit(X_train, Y_train) print(est_gp) with open('result.txt', 'w') as f: f.write(est_gp) score_train = est_gp.score(X_train, Y_train) score_test = est_gp.score(X_test, Y_test) print(score_train, score_test)
comparison=True, transformer=True, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, const_range=(-20.0, 20.0), parsimony_coefficient=0.01, random_state=1) est_gp.fit(da_train, tar_train) print est_gp._program score_gp = est_gp.score(da_test, tar_test) print score_gp p = est_gp.predict(da) print r2_score(tar, p) print explained_variance_score(tar, p) print mean_squared_error(tar, p) #决策树 est_dt = DecisionTreeRegressor() est_dt = est_dt.fit(da_train, tar_train) p1 = est_dt.predict(da) print est_dt.score(da_test, tar_test) print r2_score(tar, p1) print explained_variance_score(tar, p1) print mean_squared_error(tar[x:], p1[x:])
def test_symbolic_regressor(): """Check that SymbolicRegressor example works""" rng = check_random_state(0) X_train = rng.uniform(-1, 1, 100).reshape(50, 2) y_train = X_train[:, 0] ** 2 - X_train[:, 1] ** 2 + X_train[:, 1] - 1 X_test = rng.uniform(-1, 1, 100).reshape(50, 2) y_test = X_test[:, 0] ** 2 - X_test[:, 1] ** 2 + X_test[:, 1] - 1 est_gp = SymbolicRegressor(population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, parsimony_coefficient=0.01, random_state=0) est_gp.fit(X_train, y_train) assert_equal(len(est_gp._programs), 7) expected = 'sub(add(-0.999, X1), mul(sub(X1, X0), add(X0, X1)))' assert_equal(est_gp.__str__(), expected) assert_almost_equal(est_gp.score(X_test, y_test), 0.99999, decimal=5) dot_data = est_gp._program.export_graphviz() expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", ' 'fillcolor="#136ed4"] ;\n1 [label="add", fillcolor="#136ed4"] ' ';\n2 [label="-0.999", fillcolor="#60a6f6"] ;\n3 [label="X1", ' 'fillcolor="#60a6f6"] ;\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", ' 'fillcolor="#136ed4"] ;\n5 [label="sub", fillcolor="#136ed4"] ' ';\n6 [label="X1", fillcolor="#60a6f6"] ;\n7 [label="X0", ' 'fillcolor="#60a6f6"] ;\n5 -> 7 ;\n5 -> 6 ;\n8 [label="add", ' 'fillcolor="#136ed4"] ;\n9 [label="X0", fillcolor="#60a6f6"] ' ';\n10 [label="X1", fillcolor="#60a6f6"] ;\n8 -> 10 ;\n8 -> 9 ' ';\n4 -> 8 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected) assert_equal(est_gp._program.parents, {'method': 'Crossover', 'parent_idx': 1555, 'parent_nodes': range(1, 4), 'donor_idx': 78, 'donor_nodes': []}) idx = est_gp._program.parents['donor_idx'] fade_nodes = est_gp._program.parents['donor_nodes'] assert_equal(est_gp._programs[-2][idx].__str__(), 'add(-0.999, X1)') assert_almost_equal(est_gp._programs[-2][idx].fitness_, 0.351803319075) dot_data = est_gp._programs[-2][idx].export_graphviz(fade_nodes=fade_nodes) expected = ('digraph program {\nnode [style=filled]\n0 [label="add", ' 'fillcolor="#136ed4"] ;\n1 [label="-0.999", ' 'fillcolor="#60a6f6"] ;\n2 [label="X1", fillcolor="#60a6f6"] ' ';\n0 -> 2 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected) idx = est_gp._program.parents['parent_idx'] fade_nodes = est_gp._program.parents['parent_nodes'] assert_equal(est_gp._programs[-2][idx].__str__(), 'sub(sub(X1, 0.939), mul(sub(X1, X0), add(X0, X1)))') assert_almost_equal(est_gp._programs[-2][idx].fitness_, 0.17080204042) dot_data = est_gp._programs[-2][idx].export_graphviz(fade_nodes=fade_nodes) expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", ' 'fillcolor="#136ed4"] ;\n1 [label="sub", fillcolor="#cecece"] ' ';\n2 [label="X1", fillcolor="#cecece"] ;\n3 [label="0.939", ' 'fillcolor="#cecece"] ;\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", ' 'fillcolor="#136ed4"] ;\n5 [label="sub", fillcolor="#136ed4"] ' ';\n6 [label="X1", fillcolor="#60a6f6"] ;\n7 [label="X0", ' 'fillcolor="#60a6f6"] ;\n5 -> 7 ;\n5 -> 6 ;\n8 [label="add", ' 'fillcolor="#136ed4"] ;\n9 [label="X0", fillcolor="#60a6f6"] ' ';\n10 [label="X1", fillcolor="#60a6f6"] ;\n8 -> 10 ;\n8 -> 9 ' ';\n4 -> 8 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected)
plt.plot(timeline, sample_data) plt.show() # Train/Test separation X_train, X_test, y_train, y_test = train_test_split(timeline, sample_data, test_size=0.2) # Apply regressor reg = SymbolicRegressor(population_size=2000, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, function_set=('add', 'sub', 'mul', 'div', 'sin', 'cos', 'tan', 'abs', 'log')) reg.fit(X_train.reshape(-1, 1), y_train) score = reg.score(X_test.reshape(-1, 1), y_test) print("Function Regressed:", reg._program, " | Score:", score) # Create validation data with fault and labels validation_timeline, validation_data, real_labels = gen_validation_data(parametric_function, timeline_end, validation_max_end, points_count, noise_std, validation_fault_count) # Validate fault detection reconstruction = reg.predict(validation_timeline.reshape(-1, 1)) res = minimize(calc_labels_fitness, np.array([initial_fault_limiar]), method='Nelder-Mead') rec_labels = calc_labels(res.x[0], reconstruction, validation_data) print(confusion_matrix(real_labels, rec_labels))
p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0) genp.fit(x_train, y_train) print(genp._program) pre = genp.predict(x_val) print('accuracy on training set\n') genp.score(x_train, y_train) print('accuracy on validation set\n') genp.score(x_val, y_val) #%% """ #% df2 = cp.deepcopy(df) df2['Survived'] = df2['Survived'].astype('str').replace('0','-1').astype('int64') df2.corr() df.corr().loc['Survived'].plot.bar() df.plot.scatter(x= 'Pclass', y = 'Survived') df.plot.scatter(x= 'Fare', y = 'Survived')
stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, function_set=('add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan')) est_gp.fit(trainSet, z_train) print(est_gp._program) score_gp = est_gp.score(testSet, z_test) #score_gp = est_gp.mean_absolute_error(testSet, z_test) print(score_gp) #19 generations required #min(add(add(log(add(inv(div(sin(X0), mul(X0, 0.952))),neg(abs(X0)))), neg(inv(div(sin(X1), mul(X0, 0.952))))), min(add(log(add(min(mul(-0.020, X1), cos(X1)), neg(add(log(add(min(inv(div(sin(X1), mul(X0, 0.952))), cos(X1)), div(sin(X1), add(log(add(min(mul(-0.020, X1), cos(X1)), neg(add(log(cos(X1)), neg(0.952))))), add(tan(tan(sin(X1))), neg(inv(div(sin(X1), neg(div(X1, 0.794)))))))))), neg(0.952))))), add(tan(tan(sin(X1))), neg(inv(div(sin(X1), neg(div(X1, 0.794))))))), div(neg(cos(tan(X1))), inv(mul(add(X1, X1), log(X1)))))), div(neg(cos(inv(mul(add(X1, X1), log(X1))))), inv(mul(add(X1, X1), log(X1))))) z_gp = est_gp.predict(np.c_[x.ravel(), y.ravel()]).reshape(x.shape) #print(z_gp) ax = plt.figure().gca(projection='3d') ax.set_xlim(-10, 10) ax.set_ylim(-10, 10) #surf = ax.plot_trisurf(x_test, y_test, z_gp, color='green') surf = ax.plot_surface(x, y,
def gplearn_procedure(equation_id, no_samples=1000, input_range=(-1, 1), save_path=None, save=True, load=True, func_set=[ 'add', 'sub', 'mul', 'div', 'log', 'sqrt', 'cos', 'tan', 'sin', 'pow', 'exp' ], verbose=1): """ Uses gplearn to attempt to predict the equation form of 'equation_id' Renders a graphviz image to images/gplearn/ returns predicted equation, R^2 score and time taken Parameters ---------- equation_id : string The ID of an equation in the dataset. Must be a valid one no_samples : int The number of samples you want fed in to the algorithm input_range: tuple(float, float) The minimum and maximum values of all input parameters save_path: string path The path to where you wish the save this dataframe save: boolean Saves file to save_path iff True load: boolean If true then looks for file in save_path and loads it preemptively if it is there func_set : list List of strings i.e names of functions to include / operations to consider current options include ‘add’ : addition, arity=2. ‘sub’ : subtraction, arity=2. ‘mul’ : multiplication, arity=2. ‘div’ : protected division where a denominator near-zero returns 1., arity=2. ‘sqrt’ : protected square root where the absolute value of the argument is used, arity=1. ‘log’ : protected log where the absolute value of the argument is used and a near-zero argument returns 0., arity=1. ‘abs’ : absolute value, arity=1. ‘neg’ : negative, arity=1. ‘inv’ : protected inverse where a near-zero argument returns 0., arity=1. ‘max’ : maximum, arity=2. ‘min’ : minimum, arity=2. ‘sin’ : sine (radians), arity=1. ‘cos’ : cosine (radians), arity=1. ‘tan’ : tangent (radians), arity=1. 'exp' : exponential (self defined), arity=1 'pow' : power (self defined), arity=2 verbose : int controls how much is printed, 0 is quitest Returns ------- string, float, float """ try: df = create_dataset(equation_id, no_samples=no_samples, input_range=input_range, save_path=save_path, save=save, load=load).dropna() X = df.drop('target', axis=1) y = df['target'] except: traceback.print_exc() print(f"Error on equation {equation_id} skipping") return '', 0, 0 no_samples = min(no_samples, len(y)) default_func_set = ('add', 'sub', 'mul', 'div', 'log', 'sqrt', 'cos', 'tan', 'sin', 'abs', 'neg', 'inv', 'max', 'min') final_func_set = [] for func in func_set: if func in default_func_set: final_func_set.append(func) else: if func == "pow": final_func_set.append(make_function(power, func, 2)) elif func == "exp": final_func_set.append(make_function(exponent, func, 1)) elif func == "pi": final_func_set.append(make_function(pi, func, 0)) else: warnings.warn( f"{func} is an unrecognized function, skipping it") pass est_gp = SymbolicRegressor(population_size=5000, generations=10, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, function_set=final_func_set, verbose=verbose, parsimony_coefficient=0.01, random_state=0) start = time.time() hist = est_gp.fit(X[:no_samples], y[:no_samples]) end = time.time() #print(est_gp._program) dot_data = est_gp._program.export_graphviz() graph = graphviz.Source(dot_data) graph.render(f'images/gplearn/{equation_id}_estimate', format='png', cleanup=True) return est_gp._program, est_gp.score(X, y), end - start