def test_verbose_output(): """Check verbose=1 does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25), 'Best Individual'.center(42)) assert_equal(true_header, header1) header2 = verbose_output.readline().rstrip() true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10 assert_equal(true_header, header2) header3 = verbose_output.readline().rstrip() header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness', 'OOB Fitness', 'Time Left') true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields assert_equal(true_header, header3) n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines)
def test_customized_regressor_metrics(): """Check whether greater_is_better works for SymbolicRegressor.""" x_data = rng.uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2 est_gp = SymbolicRegressor(metric='mean absolute error', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.001, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() assert_equal('add(mul(X1, X1), mul(X0, X0))', formula, True) def neg_mean_absolute_error(y, y_pred, sample_weight): return -1 * mean_absolute_error(y, y_pred, sample_weight) customized_fitness = make_fitness(neg_mean_absolute_error, greater_is_better=True) c_est_gp = SymbolicRegressor(metric=customized_fitness, stopping_criteria=-0.000001, random_state=415, parsimony_coefficient=0.001, verbose=0, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal('add(mul(X1, X1), mul(X0, X0))', c_formula, True)
def test_none_const_range(): """Check that const_range=None produces no constants""" # Check with None as const_range est = SymbolicRegressor(population_size=100, generations=2, const_range=None) est.fit(boston.data, boston.target) float_count = 0 for generation in est._programs: for program in generation: if program is None: continue for element in program.program: if isinstance(element, float): float_count += 1 assert(float_count == 0) # Check with default const_range est = SymbolicRegressor(population_size=100, generations=2) est.fit(boston.data, boston.target) float_count = 0 for generation in est._programs: for program in generation: if program is None: continue for element in program.program: if isinstance(element, float): float_count += 1 assert(float_count > 1)
def _gp_fit(arg): param = arg[0] X = arg[1] Y = arg[2] est_gp = SymbolicRegressor( population_size=param[0], generations=450, parsimony_coefficient=param[1], function_set=param[2].split(" "), const_range=(-param[3], param[3]), ) training, validation = splitidx_srs(len(Y)) X_train = X[training] Y_train = Y[training] X_validation = X[validation] Y_validation = Y[validation] try: est_gp.fit(X_train, Y_train) return ( param, str(est_gp._program), est_gp._program.raw_fitness_, regression_measures(est_gp.predict(X_validation), Y_validation), ) except Exception as e: return (param, "Exception: {}".format(str(e)), 999999999)
def test_verbose_output(): """Check verbose=1 does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25), 'Best Individual'.center(42)) assert_equal(true_header, header1) header2 = verbose_output.readline().rstrip() true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10 assert_equal(true_header, header2) header3 = verbose_output.readline().rstrip() header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness', 'OOB Fitness', 'Time Left') true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields assert_equal(true_header, header3) n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines)
def experiment(seed, i): est_gp = SymbolicRegressor(population_size = pop_size, generations=num_generations, stopping_criteria=0.01, p_crossover=crossover_prob, p_subtree_mutation=mutation_prob, p_hoist_mutation=mutation_prob, p_point_mutation=mutation_prob, function_set = function_set, max_samples=0.9, verbose=1, metric=fit, random_state=seed) est_gp.fit(x, y) plt.figure(figsize=(14,5)) plt.subplot(1,2,1) plt.xlabel('Generations', fontsize=24) plt.ylabel('Best fitness', fontsize=24) plt.plot(est_gp.run_details_['best_fitness'], linewidth=3.0) plt.grid() plt.subplot(1,2,2) plt.xlabel('Generations', fontsize=24) plt.ylabel('Best size', fontsize=24) plt.plot(est_gp.run_details_['best_length'], linewidth=3.0, color='red') plt.grid() plt.suptitle('Run {}'.format(i), fontsize=24) plt.savefig('plot_{}.eps'.format(seed)) return est_gp.run_details_
def test_more_verbose_output(): """Check verbose=2 does not cause error""" old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() est = SymbolicRegressor(random_state=0, verbose=2) est.fit(boston.data, boston.target) verbose_output = sys.stdout joblib_output = sys.stderr sys.stdout = old_stdout sys.stderr = old_stderr # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines) joblib_output.seek(0) n_lines = sum(1 for l in joblib_output.readlines()) assert_equal(20, n_lines)
def getSymbolicRegressorModel(): rng = check_random_state(0) # Training samples X_train = rng.uniform(-1, 1, 100).reshape(50, 2) y_train = X_train[:, 0]**2 - X_train[:, 1]**2 + X_train[:, 1] - 1 # Testing samples X_test = rng.uniform(-1, 1, 100).reshape(50, 2) y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1 est_gp = SymbolicRegressor( population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, ) est_gp.fit(X_train, y_train) return est_gp._program
def test_verbose_output(): """Check verbose=1 does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() true_header = ' |{:^25}|{:^42}|'.format('Population Average', 'Best Individual') assert_equal(true_header, header1) header2 = verbose_output.readline().rstrip() true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10 assert_equal(true_header, header2) header3 = verbose_output.readline().rstrip() line_format = '{:>4} {:>8} {:>16} {:>8} {:>16} {:>16} {:>10}' true_header = line_format.format('Gen', 'Length', 'Fitness', 'Length', 'Fitness', 'OOB Fitness', 'Time Left') assert_equal(true_header, header3) n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines)
def test_more_verbose_output(): """Check verbose=2 does not cause error""" old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() est = SymbolicRegressor(random_state=0, verbose=2) est.fit(boston.data, boston.target) verbose_output = sys.stdout joblib_output = sys.stderr sys.stdout = old_stdout sys.stderr = old_stderr # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines) joblib_output.seek(0) n_lines = sum(1 for l in joblib_output.readlines()) # New version of joblib appears to output sys.stderr assert_equal(0, n_lines % 10)
def main(): if len(sys.argv) < 2: print("Provide data file name!") exit(1) filename = sys.argv[1] # Training samples x = read_nth_column(0, filename) x_train = np.ndarray((len(x), ), buffer=np.array(x, dtype=float)).reshape(-1, 1) # print(x_train) y = read_nth_column(1, filename) y_train = np.ndarray((len(y), ), buffer=np.array(y, dtype=float)) # print(y_train) # Testing samples X_test = read_nth_column(0, filename) y_test = read_nth_column(1, filename) est_gp = SymbolicRegressor(population_size=5000, generations=30, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, verbose=1, parsimony_coefficient=0.01, random_state=0, function_set=('add', 'sub', 'mul', 'div', 'sin', 'cos', 'sqrt', 'log')) est_gp.fit(x_train, y_train) print(est_gp._program)
def test_low_memory(): """Check the low_memory functionality works as expected.""" est = SymbolicRegressor(generations=10, random_state=56, low_memory=True) # Check there are no parents est.fit(boston.data, boston.target) assert_true(est._programs[-2] is None)
def best_approximate(L,points): n=len(L) datax=[item[1] for item in L] datax.pop() datax.pop(0) datay=[item[0] for item in L] datay.pop() datay.pop(0) print(datax) print(datay) X_train, X_test, y_train, y_test = train_test_split(datax, datay, test_size=0.33) # random_state here is a random seed, fixed so that we always get the same results sr = SymbolicRegressor( population_size=500, generations=20, stopping_criteria=0.01, # stop if the mean squared error of the best solution is lower than this function_set=('add', 'sub', 'mul', 'div'), # functions that the symbolic regression can use p_crossover=0.54, # probabilities of activation of different genetic operators p_subtree_mutation=0.1, # p_hoist_mutation=0.05, # p_point_mutation=0.3, # verbose=1, # print a lot of stuff to screen ) # launch the evolution sr.fit(X_train, y_train) Ypred=sr.predict(points) return Ypred
def test_none_const_range(): """Check that const_range=None produces no constants""" # Check with None as const_range est = SymbolicRegressor(const_range=None, generations=2) est.fit(boston.data, boston.target) float_count = 0 for generation in est._programs: for program in generation: if program is None: continue for element in program.program: if type(element) == float: float_count += 1 assert_true(float_count == 0) # Check with default const_range est = SymbolicRegressor(generations=2) est.fit(boston.data, boston.target) float_count = 0 for generation in est._programs: for program in generation: if program is None: continue for element in program.program: if type(element) == float: float_count += 1 assert_true(float_count > 1)
def run(dataset_train, dataset_test, population_size, generations, p_crossover, p_subtree_mutation, p_point_mutation, p_hoist_mutation): Xtrain, ytrain = dataset_train[:, :-1], dataset_train[:, -1] Xtest, ytest = dataset_test[:, :-1], dataset_test[:, -1] f_set = ('add', 'sub', 'mul', myDiv, 'sin', 'cos', myLog, mySqrt, myTanh, myExp) est_gp = SymbolicRegressor( population_size=population_size, generations=generations, stopping_criteria=0.01, p_crossover=p_crossover, p_subtree_mutation=p_subtree_mutation, p_hoist_mutation=p_hoist_mutation, p_point_mutation=p_point_mutation, max_samples=1.0, verbose=0, parsimony_coefficient=0.05, function_set = f_set, n_jobs=1 ) est_gp.fit(Xtrain, ytrain) return RMSE(est_gp.predict(Xtrain), ytrain), RMSE(est_gp.predict(Xtest), ytest)
def symbolic_regressor(f, npoints, xrange): X = np.linspace(xrange[0], xrange[1], npoints).reshape((-1, 1)) y = f(X) est_gp = SymbolicRegressor(population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0) est_gp.fit(X, y) sym_expr = str(est_gp._program) converter = { 'sub': lambda x, y: x - y, 'div': lambda x, y: x / y, 'mul': lambda x, y: x * y, 'add': lambda x, y: x + y, 'neg': lambda x: -x, 'pow': lambda x, y: x ** y } x, X0 = symbols('x X0') sym_reg = simplify(sympify(sym_expr, locals=converter)) sym_reg = sym_reg.subs(X0, x) Y_true = y.reshape((-1, 1)) Y_est = np.array([sympify(str(sym_reg)).subs(x, X[k]) for k in range(len(X))]).reshape((-1, 1)) R2_perf = compute_Rsquared(Y_true, Y_est) return sym_reg, R2_perf
def train(): est_gp = SymbolicRegressor(population_size=150, generations=20, stopping_criteria=0.001, p_crossover=0.8, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.05, max_samples=0.9, verbose=1, metric='mean absolute error', parsimony_coefficient=0.01) est_gp.fit(X_train, y_train) print(est_gp._program) print(est_gp.score(X_train, y_train))
def test_early_stopping(): """Check that early stopping works""" est1 = SymbolicRegressor(stopping_criteria=10, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1) est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1)
def train(x,y_truth,X_train,y_train,X_test,y_test,target_func,noise_rate,noise_level): """ x: 目标函数的分布范围 y_truth: 目标函数的真实值 X_train: 训练数据 y_train: 训练数据值(带噪声) X_test: 测试数据 y_test: 测试数据值 noise_rate: 噪声率 noise_level: 噪声水平 得出用所有数据进行训练的拟合结果。拟合效果有可能会受噪声数据的影响 """ #查看训练所用的数据 print('---训练数据---') print(np.c_[X_train,y_train]) #定义符号回归器 est_gp = SymbolicRegressor(population_size=5000, function_set=['add','sub','mul','div'],#'sin','sqrt','cos'],#,'cos','sqrt','log','abs','neg','inv','tan'], generations=10, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1,metric='mean absolute error', parsimony_coefficient=0.01, random_state=0,const_range=(-1,1)) #用训练集进行拟合训练 est_gp.fit(X_train.reshape(-1,1), y_train) #得到测试数据的预测值 y_pred = est_gp.predict(X_test.reshape(-1,1)) #得到R^2值 score_gp = est_gp.score(X_test.reshape(-1,1), y_test) #训练集的均方误差 test_mse = mean_squared_error(y_test,y_pred) print('拟合结果',str(est_gp._program)) print('R^2 : %.6f'%score_gp) print('MSE : %.6f'%test_mse) #可视化目标曲线 plt.xlabel('$x$',fontsize = 18) plt.ylabel('$y$',fontsize = 18) plt.plot(x,y_truth,label = target_func) plt.legend(loc = 'best',fontsize = 18) #可视化训练数据集 plt.scatter(X_train,y_train,label = 'NoisyData',alpha = 0.9) plt.legend(loc = 'best',fontsize = 18) #可视化拟合曲线 data = np.c_[X_test,y_pred] data = data[np.lexsort(data[:,::-1].T)] plt.plot(data[:,0], data[:,1], label = 'GP : '+str(est_gp._program)) #标题 fmt = '$R^2 =\/ {0:.6f}$ , $MSE =\/ {1:.6f}$'.format(score_gp,test_mse) plt.title(fmt,fontproperties = 'SimHei',fontsize = 20) plt.legend(loc = 'best',fontsize = 18)
def regressionOfFailureRate(coords, seed=None, population_size=None, generations=None): """ Pokusí se co nejlépe proložit body \a coords vyjadřující četnost chyb. Snaží se při tom aby výsledek byl integrovatelný, ovšem integrovatelnost nezaručuje. """ if population_size is None: population_size = 1000 if generations is None: generations = 20 # Rozdělení x-ových a y-ových souřadnic pro GpLearn X_train, y_train = zip(*(([x], y) for (x, y) in coords)) from gplearn.genetic import SymbolicRegressor # Kolik náhodných čísel gplearn vygeneruje? Není omezeno. Buď se dosadí funkce, proměnná nebo se vygeneruje náhodné číslo z daného intervalu. est_gp = SymbolicRegressor( # Estimator Genetic Programming population_size=population_size, generations=1, tournament_size=20, stopping_criteria=0.0, const_range=(0.0, 5.0), init_depth=(2, 6), init_method='half and half', function_set=('add', 'mul'), metric='mean absolute error', #metric=sum_absolute_error parsimony_coefficient=0.001, p_crossover=0.9, p_subtree_mutation=0.01, p_hoist_mutation=0.01, p_point_mutation=0.01, p_point_replace=0.05, max_samples=1.0, warm_start=False, n_jobs=-1, verbose=VERBOSITY, random_state=seed) est_gp.fit(X_train, y_train) for p in est_gp._programs[0]: p.program[ 0] = gplearn.functions.div2 # Všechny kořeny přepíšeme na dělení for i in range(1, generations): for p in est_gp._programs[i - 1]: p.get_subtree = functools.partial( get_subtree, p) # Všem potomkům zakážeme křížení z kořene est_gp.set_params(generations=i + 1, warm_start=True) est_gp.fit(X_train, y_train) best_individual = est_gp._program return est_gp, extractExprFromGplearn(best_individual.program)
def test_run_details(): """Check the run_details_ attribute works as expected.""" est = SymbolicRegressor(generations=5, random_state=415) est.fit(boston.data, boston.target) # Check generations are indexed as expected without warm_start assert_equal(est.run_details_['generation'], list(range(5))) est.set_params(generations=10, warm_start=True) est.fit(boston.data, boston.target) # Check generations are indexed as expected with warm_start assert_equal(est.run_details_['generation'], list(range(10))) # Check all details have expected number of elements for detail in est.run_details_: assert_equal(len(est.run_details_[detail]), 10)
def test_subsample(): """Check that subsample work and that results differ""" est1 = SymbolicRegressor(max_samples=1.0, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(max_samples=0.7, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def test_trigonometric(): """Check that using trig functions work and that results differ""" est1 = SymbolicRegressor(random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(trigonometric=True, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def test_parsimony_coefficient(): """Check that parsimony coefficients work and that results differ""" est1 = SymbolicRegressor(population_size=100, generations=2, parsimony_coefficient=0.001, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(population_size=100, generations=2, parsimony_coefficient='auto', random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert(abs(est1 - est2) > 0.01)
def test_trigonometric(): """Check that using trig functions work and that results differ""" est1 = SymbolicRegressor(random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor( function_set=['add', 'sub', 'mul', 'div', 'sin', 'cos', 'tan'], random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def test_input_shape(): """Check changed dimensions cause failure""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) X2 = np.reshape(random_state.uniform(size=45), (5, 9)) # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(X, y) assert_raises(ValueError, est.predict, X2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(X, y) assert_raises(ValueError, est.transform, X2)
def RunSR(X_train, y_train, X_test, weights, SR_metric, sqrt, rand_state): operators_set=['add', 'sub', 'mul', 'div'] if sqrt==1: operators_set = operators_set + ['sqrt'] SR = SymbolicRegressor(population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=0.65, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=1, verbose=0, function_set = operators_set, parsimony_coefficient=0.001, random_state=rand_state, init_depth=(2,6), tournament_size=20,metric = SR_metric) if weights>0: weights_vector = GenerateWeights(X_train, weights) SR.fit(X_train, y_train, weights_vector) else: SR.fit(X_train, y_train) print(SR._program) return SR
def main(): x = np.genfromtxt('x_train.csv', delimiter=',').reshape((1000, 1)) y = np.genfromtxt('y_train.csv', delimiter=',') est_gp = SymbolicRegressor(population_size=50, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0) est_gp.fit(x, y) print(est_gp._program) est_tree = DecisionTreeRegressor() est_tree.fit(x, y) est_rf = RandomForestRegressor() est_rf.fit(x, y) x0 = np.arange(-1, 1, 1 / 10.) x1 = np.arange(-1, 1, 1 / 10.) x0, x1 = np.meshgrid(x0, x1) y_truth = 3 * x0**2 + 5 * x0 + 1 # exact function we are estimating y_gp = est_gp.predict(np.c_[x0.ravel()]).reshape(x0.shape) score_gp = est_gp.score(x, y) y_tree = est_tree.predict(np.c_[x0.ravel()]).reshape(x0.shape) score_tree = est_tree.score(x, y) y_rf = est_rf.predict(np.c_[x0.ravel()]).reshape(x0.shape) score_rf = est_rf.score(x, y) for i, (ys, score, title) in enumerate([(y_truth, None, "Ground Truth"), (y_gp, score_gp, "SymbolicRegressor"), (y_tree, score_tree, "DecisionTreeRegressor"), (y_rf, score_rf, "RandomForestRegressor")]): plt.subplot(2, 2, i + 1) plt.plot(x0, ys, 'C0o') plt.grid(True, which='both') plt.axhline(y=0, color='k') plt.axvline(x=0, color='k') plt.show()
def pca_gp(rows, features, function): run_results = {} for run_number in range(0, NUMBER_OF_RUNS): # Generating random data rng = check_random_state(run_number) X = rng.uniform(-1, 1, rows).reshape(rows // features, features) Y = function(X) # Dividing it into training and test set X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=TEST_SIZE, random_state=0) # Convert it to PCA pca = PCA(n_components=1) X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) # Training the system est_gp = SymbolicRegressor(population_size=POPULATION_SIZE, generations=NUMBER_OF_GENERATION, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, n_jobs=-1) est_gp.fit(X_train_pca, y_train) generation_results = [] for idGen in range(len(est_gp._programs)): single_generation = {} single_generation[idGen] = math.inf for idPopulation in range(est_gp.population_size): if (est_gp._programs[idGen][idPopulation] != None): if est_gp._programs[idGen][ idPopulation].raw_fitness_ < single_generation[ idGen]: single_generation[idGen] = est_gp._programs[idGen][ idPopulation].raw_fitness_ generation_results.append(single_generation) run_results[run_number] = generation_results return run_results
def main(): data = read_data() regressor = SymbolicRegressor(population_size=1000, generations=100, const_range=(.0, .0), init_depth=(2, 10), init_method='grow', function_set=('add', 'sub', 'mul', 'div', 'log', 'sin', 'cos'), p_crossover=0.7, p_subtree_mutation=0.0, p_hoist_mutation=0.0, p_point_mutation=0.0, verbose=1, n_jobs=-1) (n, _) = data.shape regressor.fit(data[:, 0].reshape(n, 1), data[:, 1]) print(regressor._program)
def test_verbose_with_oob(): """Check oob scoring for subsample does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines)
def test_verbose_with_oob(): """Check oob scoring for subsample does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines)
def fit(self, x_data): est_gp = SymbolicRegressor(population_size=500, generations=10, stopping_criteria=0.0001, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, metric=make_fitness( self.make_explict_func(), False), function_set=self.function_set, verbose=1, parsimony_coefficient=0.01) indicies = np.arange(x_data.shape[0]) est_gp.fit(x_data, indicies) return est_gp
def test_verbose_with_oob(): """Check oob scoring for subsample does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(population_size=100, generations=10, max_samples=0.9, random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) # Ignore header rows _ = verbose_output.readline().rstrip() _ = verbose_output.readline().rstrip() _ = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines)
def test_sample_weight(): """Check sample_weight param works""" # Check constant sample_weight has no effect sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicRegressor(generations=2, random_state=0) est1.fit(boston.data, boston.target) est2 = SymbolicRegressor(generations=2, random_state=0) est2.fit(boston.data, boston.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicRegressor(generations=2, random_state=0) est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1) assert_almost_equal(est1._program.fitness_, est2._program.fitness_) assert_almost_equal(est1._program.fitness_, est3._program.fitness_) # And again for the transformer sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicTransformer(generations=2, random_state=0) est1 = est1.fit_transform(boston.data, boston.target) est2 = SymbolicTransformer(generations=2, random_state=0) est2 = est2.fit_transform(boston.data, boston.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicTransformer(generations=2, random_state=0) est3 = est3.fit_transform(boston.data, boston.target, sample_weight=sample_weight * 1.1) assert_array_almost_equal(est1, est2) assert_array_almost_equal(est1, est3)
def test_pickle(): """Check pickability""" # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) score = est.score(boston.data[500:, :], boston.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(boston.data[500:, :], boston.target[500:]) assert_equal(score, score2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) X_new = est.transform(boston.data[500:, :]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) X_new2 = est2.transform(boston.data[500:, :]) assert_array_almost_equal(X_new, X_new2)
def test_parsimony_coefficient(): """Check that parsimony coefficients work and that results differ""" est1 = SymbolicRegressor(parsimony_coefficient=0.001, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(parsimony_coefficient=0.1, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) est3 = SymbolicRegressor(parsimony_coefficient='auto', random_state=0) est3.fit(boston.data[:400, :], boston.target[:400]) est3 = mean_absolute_error(est3.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01) assert_true(abs(est1 - est3) > 0.01) assert_true(abs(est2 - est3) > 0.01)
def test_print_overloading_estimator(): """Check that printing a fitted estimator results in 'pretty' output""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est._program) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out output = str([gp.__str__() for gp in est]) print(output.replace("',", ",\n").replace("'", "")) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program)
def main(): escalation = { Position.GOALKEEPER: 1, Position.DEFENDER: 2, Position.SIDE: 2, Position.MIDFIELD: 4, Position.ATTACKER: 2, Position.COACH: 1 } print("Getting auth") auth = get_auth() print("Getting teams") teams = get_teams() print("Getting athletes") athletes = get_athletes(teams) print("Getting scores") scores = [athlete.get_row(auth) for athlete in athletes] max_length = 0 for score in scores: if len(score) > max_length: max_length = len(score) fixed_score = [] for score in scores: fixed_score.append([0.0] * (max_length - len(score)) + score) generations = 2000 print("Training using " + str(generations) + " generations. It can take a long time to end") est_gp = SymbolicRegressor( population_size=5000, generations=generations, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, const_range=(-50., 50.), function_set=( 'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan')) est_gp.fit([x[:-1] for x in fixed_score], [x[-1] for x in fixed_score]) predictions = est_gp.predict([x[:-1] for x in fixed_score]) print("Getting results") results = [[athlete, prediction] for athlete, prediction in zip(athletes, predictions)] results.sort(key=lambda x: -x[1]) print("\"Scale\",\"Name\",\"Team\",\"Position\",\"Status\",\"Price\",\"Prediction\"") for result in results: athlete = result[0] prediction = result[1] scale = athlete.status == Status.Probable and escalation[athlete.position] > 0 if scale: escalation[athlete.position] = escalation[athlete.position] - 1 print("\"" + ("*" if scale else " ") + "\",\"" + athlete.nick + "\",\"" + athlete.club.name + "\",\"" + str(athlete.position.name) + "\",\"" + str(athlete.status.name) + "\"," + str(athlete.price) + "," + str(prediction)) print("Done")