def test_validate_fitness(): """Check that valid fitness measures are accepted & invalid raise error""" # Check arg count checks _ = make_fitness(function=_mean_square_error, greater_is_better=True) # non-bool greater_is_better assert_raises(ValueError, make_fitness, _mean_square_error, 'Sure') assert_raises(ValueError, make_fitness, _mean_square_error, 1) # Check arg count tests def bad_fun1(x1, x2): return 1.0 assert_raises(ValueError, make_fitness, bad_fun1, True) # Check return type tests def bad_fun2(x1, x2, w): return 'ni' assert_raises(ValueError, make_fitness, bad_fun2, True) def _custom_metric(y, y_pred, w): """Calculate the root mean square error.""" return np.sqrt(np.average(((y_pred - y) ** 2), weights=w)) custom_metric = make_fitness(function=_custom_metric, greater_is_better=True) for Symbolic in (SymbolicRegressor, SymbolicTransformer): # These should be fine est = Symbolic(generations=2, random_state=0, metric=custom_metric) est.fit(boston.data, boston.target)
def test_parallel_custom_metric(): """Regression test for running parallel training with custom transformer""" def _custom_metric(y, y_pred, w): """Calculate the root mean square error.""" return np.sqrt(np.average(((y_pred - y) ** 2), weights=w)) custom_metric = make_fitness(function=_custom_metric, greater_is_better=True) est = SymbolicRegressor(generations=2, metric=custom_metric, random_state=0, n_jobs=2) est.fit(boston.data, boston.target) _ = pickle.dumps(est) # Unwrapped functions should fail custom_metric = make_fitness(function=_custom_metric, greater_is_better=True, wrap=False) est = SymbolicRegressor(generations=2, metric=custom_metric, random_state=0, n_jobs=2) est.fit(boston.data, boston.target) assert_raises(AttributeError, pickle.dumps, est) # Single threaded will also fail in non-interactive sessions est = SymbolicRegressor(generations=2, metric=custom_metric, random_state=0) est.fit(boston.data, boston.target) assert_raises(AttributeError, pickle.dumps, est)
def test_customized_regressor_metrics(): """Check whether greater_is_better works for SymbolicRegressor.""" x_data = rng.uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2 est_gp = SymbolicRegressor(metric='mean absolute error', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.001, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() assert_equal('add(mul(X1, X1), mul(X0, X0))', formula, True) def neg_mean_absolute_error(y, y_pred, sample_weight): return -1 * mean_absolute_error(y, y_pred, sample_weight) customized_fitness = make_fitness(neg_mean_absolute_error, greater_is_better=True) c_est_gp = SymbolicRegressor(metric=customized_fitness, stopping_criteria=-0.000001, random_state=415, parsimony_coefficient=0.001, verbose=0, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal('add(mul(X1, X1), mul(X0, X0))', c_formula, True)
def make_explict_fitness(func, metric, greater_is_better, use_raw_y=False): """ :param func: function the function this is used to get the reward given output :param metric: function the function measures the fitness :param greater_is_better: bool whether it is true that the greater the fitness is the better the performance is :return: """ def _fitness(y, y_pred, sample_weight): """ :param y: [0] * len(x) This should be None since we wont know the reward before we get the y_pred. In practice we use [0] * len(x) :param y_pred: The y_pred generated by the algorithm :param sample_weight: sample_weight for each label :return: """ if use_raw_y: y = func(y) else: y = func(y_pred) return metric(y, y_pred, sample_weight) return make_fitness(_fitness, greater_is_better)
def test_validate_fitness(): """Check that custom fitness functions are accepted""" def _custom_metric(y, y_pred, w): """Calculate the root mean square error.""" return np.sqrt(np.average(((y_pred - y)**2), weights=w)) custom_metric = make_fitness(function=_custom_metric, greater_is_better=True) for Symbolic in (SymbolicRegressor, SymbolicTransformer): # These should be fine est = Symbolic(generations=2, random_state=0, metric=custom_metric) est.fit(boston.data, boston.target)
def fit(self, x_data): est_gp = SymbolicRegressor(population_size=500, generations=10, stopping_criteria=0.0001, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, metric=make_fitness( self.make_explict_func(), False), function_set=self.function_set, verbose=1, parsimony_coefficient=0.01) indicies = np.arange(x_data.shape[0]) est_gp.fit(x_data, indicies) return est_gp
def test_validate_fitness(): """Check that valid fitness measures are accepted & invalid raise error""" # Check arg count checks fun = make_fitness(function=_mean_square_error, greater_is_better=True) # non-bool greater_is_better assert_raises(ValueError, make_fitness, _mean_square_error, 'Sure') assert_raises(ValueError, make_fitness, _mean_square_error, 1) # Check arg count tests def bad_fun1(x1, x2): return 1.0 assert_raises(ValueError, make_fitness, bad_fun1, True) # Check return type tests def bad_fun2(x1, x2, w): return 'ni' assert_raises(ValueError, make_fitness, bad_fun2, True)
def test_custom_transformer_metrics(): """Check whether greater_is_better works for SymbolicTransformer.""" est_gp = SymbolicTransformer(generations=2, population_size=100, hall_of_fame=10, n_components=1, metric='pearson', random_state=415) est_gp.fit(boston.data, boston.target) for program in est_gp: formula = program.__str__() expected_formula = ('sub(div(mul(X4, X12), div(X9, X9)), ' 'sub(div(X11, X12), add(X12, X0)))') assert_equal(expected_formula, formula, True) def _neg_weighted_pearson(y, y_pred, w): """Calculate the weighted Pearson correlation coefficient.""" with np.errstate(divide='ignore', invalid='ignore'): y_pred_demean = y_pred - np.average(y_pred, weights=w) y_demean = y - np.average(y, weights=w) corr = ( (np.sum(w * y_pred_demean * y_demean) / np.sum(w)) / np.sqrt( (np.sum(w * y_pred_demean**2) * np.sum(w * y_demean**2)) / (np.sum(w)**2))) if np.isfinite(corr): return -1 * np.abs(corr) return 0. neg_weighted_pearson = make_fitness(function=_neg_weighted_pearson, greater_is_better=False) c_est_gp = SymbolicTransformer(generations=2, population_size=100, hall_of_fame=10, n_components=1, stopping_criteria=-1, metric=neg_weighted_pearson, random_state=415) c_est_gp.fit(boston.data, boston.target) for program in c_est_gp: c_formula = program.__str__() assert_equal(expected_formula, c_formula, True)
def test_custom_classifier_metrics(): """Check whether greater_is_better works for SymbolicClassifier.""" x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2 y_true = (y_true < y_true.mean()).astype(int) est_gp = SymbolicClassifier(metric='log loss', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.01, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() expected_formula = 'sub(0.364, mul(add(X0, X0), add(X0, X0)))' assert_equal(expected_formula, formula, True) def negative_log_loss(y, y_pred, w): """Calculate the log loss.""" eps = 1e-15 y_pred = np.clip(y_pred, eps, 1 - eps) score = y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred) return np.average(score, weights=w) customized_fitness = make_fitness(negative_log_loss, greater_is_better=True) c_est_gp = SymbolicClassifier(metric=customized_fitness, stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.01, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal(expected_formula, c_formula, True)
# definate_variable=[ # # [-4, [3]], # [-3, [2]], # [-2, [1]], # [-1, [0]]], # variable_linkage=None) # result = mainPart(X, y, pset, pop_n=500, random_seed=6, cxpb=0.5, mutpb=0.5, ngen=10, tournsize=3, max_value=10,max_=3, # double=False, score=[r2_score,custom_loss_func], iner_add=True, target_dim=None,cal_dim=False,store=False) def _mape(y, y_pred, w): """Calculate the mean absolute percentage error.""" return r2_score(y, y_pred, w) mape = make_fitness(_mape, greater_is_better=True) # X = normalize(X) # sr = SymbolicRegressor(population_size=1000, generations=50, tournament_size=100, stopping_criteria=0.1, # const_range=(-1.0, 1.0), init_depth=(4, 6), init_method='half and half', # function_set=('add', 'sub', 'mul', 'div',"log"), metric=mape, # parsimony_coefficient=0.001, p_crossover=0.9, p_subtree_mutation=0.01, # p_hoist_mutation=0.01, p_point_mutation=0.01, p_point_replace=0.05, # max_samples=1.0, feature_names=None, warm_start=False, low_memory=False, # n_jobs=1, verbose=0, random_state=7) sr = SymbolicTransformer(population_size=1000, hall_of_fame=100, n_components=10, generations=20,
#Define exp def _exp(x): y = np.exp(x) #protext for infinites y[np.isinf(y)] = 10**6 return y exp = functions.make_function(_exp, 'exp', 1) function_set = ['add', 'sub', 'mul', 'div', 'log', 'sin', 'cos', exp] #create summed absolute error as metric _sae = lambda y, t, w: np.sum(np.abs(y - t)) sae = fitness.make_fitness(_sae, False) n_generations = 50 #Initialize genetic programm regressor est_gp = genetic.SymbolicRegressor(population_size=1000, generations=1, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0, p_hoist_mutation=0, p_point_mutation=0, max_samples=0.9, verbose=1, parsimony_coefficient=0, random_state=0, metric=sae,
scale = make_function(function=_scale, name='scale', arity=1) user_function = [exp, square, ts_mid, wma, skew, kurt, norm, normMaxMin, \ corr, cov, delta_pct, reg_alpha, reg_beta, reg_resi, \ delta, delay, rank, scale, sma, stddev, product, \ ts_rank, ts_min, ts_max, ts_argmax, ts_argmin, ts_sum] #%% 设置目标函数 def _my_metric(y, y_pred, w): value = np.sum(y + y_pred) return value my_metric = make_fitness(function=_my_metric, greater_is_better=True) #%% 生成表达式 generations = 3 # 进化世代数 population_size = 1000 # 每一代中的公式数量 tournament_size = 20 # 每一代中被随机选中计算适应度的公式数 const_range = (0.0, 10.0) function_set = init_function + user_function # 函数算子 metric = my_metric # 目标函数作为适应度 random_state = 316 # 设置随机种子 est_gp = SymbolicTransformer(feature_names=fields, function_set=function_set, generations=generations, metric=metric, population_size=population_size,
# annual_std = annualized_factor[data_frequency] * np.nanstd(daily_ret) std = np.nanstd(daily_ret) # not annualized if std == 0: sp = 0 else: sp = totret / std return sp def _accuracy_score(y, y_pred, w=None): y_digi = np.digitize(y, [-0.05, 0.05]) - 1 y_pred_digi = np.digitize(y_pred, [-0.05, 0.05]) - 1 return accuracy_score(y_digi, y_pred_digi) gp_sharpe = make_fitness(_sharpe, greater_is_better=True) gp_accuracy_score = make_fitness(_accuracy_score, greater_is_better=True) def clean_gplearn_programs(gplearn_programs, verbose=0): all_programs_info_list = [] if verbose > 0: iterobj = tqdm(enumerate(gplearn_programs)) else: iterobj = enumerate(gplearn_programs) for gen_i, gen in iterobj: for prog_i, prog in enumerate(gen): if prog is not None: _fitness = prog.fitness_ _depth = prog.depth_
import numpy as np import matplotlib.pyplot as plt from gplearn.functions import make_function from gplearn.fitness import make_fitness from gplearn.genetic import SymbolicRegressor import graphviz def exp_func(x): with np.errstate(over='ignore'): return np.where(np.abs(x) < 100, np.exp(x), 0.) exp = make_function(function=exp_func, name='expo', arity=1) def _fitness(y, y_pred, sample_weight): return np.sum(np.abs(y-y_pred)) fit = make_fitness(function=_fitness, greater_is_better=False, wrap=False) def get_data(): x = np.linspace(-1, 1, 21).reshape(-1,1) y = np.array([0, -0.1629, -0.2624, -0.3129, -0.3264, -0.3125, -0.2784, -0.2289, -0.1664, -0.0909, 0.0, 0.1111, 0.2496, 0.4251, 0.6496, 0.9375, 1.3056, 1.7731, 2.3616, 3.0951, 4.0000] ) return x, y pop_size = 1000 function_set = ['add', 'sub', 'mul', 'log', exp, 'sin', 'cos', 'div'] num_generations = 50 crossover_prob = 0.7 mutation_prob = 0.1 def experiment(seed, i): est_gp = SymbolicRegressor(population_size = pop_size, generations=num_generations, stopping_criteria=0.01, p_crossover=crossover_prob, p_subtree_mutation=mutation_prob,
# def explicit_fitness(y, y_pred, sample_weight): n_data = len(y) y = [int(_) for _ in y] indices = (ctypes.c_int * n_data)(*y) arr = (ctypes.c_double * n_data)(*y_pred) res = get_reward_func(indices, arr) # print(res) return res # metric_gp = DynamicSymbolicRegressor.make_explict_fitness(get_reward_func, y_as_fitness, False) # x_data = x_data.reshape(10, 1) est_gp = SymbolicRegressor(population_size=50, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, metric=make_fitness(explicit_fitness, False), max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0) _ = [i for i in range(x_data.shape[0])] est_gp.fit(x_data, _) from PIL import Image graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz()) graph.write_png("tree.png") # print([method for method in dir(graph) if callable(getattr(graph, method))]) # Image.open(graph.create_png())
def train(): mid = Middleware(CONFIG_DLL_PATH) get_reward_func = mid.get_function(CONFIG_FUNC_KEY_REWARD) get_reward_func.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int] get_reward_func.restype = ctypes.c_double cheating_func = mid.get_function(CONFIG_FUNC_KEY_CHEAT) cheating_func.restype = ctypes.POINTER(ctypes.c_double) x_data = read_data(CONFIG_FILE_PATH) _x_data = x_data.flatten() x_arr_pointer = _x_data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) x_len = len(_x_data) n_data = int(x_len / CONFIG_N_DIM) def explicit_fitness(y, _y_pred, sample_weight): _indices = np.array([i for i in range(len(y)) if sample_weight[i]], dtype=int) _y_pred_arr = np.array([_y_pred[i] for i in range(len(y)) if sample_weight[i]], dtype=float) _n_data = len(_indices) indices_pointer = _indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) y_pred_arr_pointer = _y_pred_arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) result = get_reward_func(indices_pointer, y_pred_arr_pointer, _n_data, x_arr_pointer, CONFIG_N_DIM, x_len) return result explicit_fitness.counter = 0 explicit_fitness.res = 0 function_set = ['add', 'sub', 'mul', 'div', 'sin'] est_gp = SymbolicRegressor(population_size=5000, generations=10, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, metric=make_fitness(explicit_fitness, False), function_set=function_set, max_samples=0.8, verbose=1, parsimony_coefficient=0.01, random_state=0) _ = [i for i in range(x_data.shape[0])] est_gp.fit(x_data, _) ts = int(time.time()) graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz()) graph.write_png("outputs/gp-{suffix}.png".format(suffix=ts)) res = cheating_func(x_arr_pointer, CONFIG_N_DIM, x_len) y_truth = np.array([float(res[i]) for i in range(n_data)]) y_pred = np.array(est_gp.predict(x_data)) n_data_plot = 200 indicies_plot = sorted(np.random.choice(n_data, n_data_plot, replace=False)) canvas = gp_plot.GPCanvas() canvas.draw_line_chart_2d(range(0, n_data_plot), y_truth[indicies_plot], color="blue", label="y_truth", line_style="solid") canvas.draw_line_chart_2d(range(0, n_data_plot), y_pred[indicies_plot], color="red", label="y_pred") mse = ((np.array(y_truth) - np.array(y_pred)) ** 2).mean() canvas.set_x_label("Indices") canvas.set_y_label("Values") canvas.set_title("Fitting plot with MSE={:5f}".format(mse)) canvas.set_legend() canvas.set_axis_invisible() canvas.froze()
import numpy as np import pandas as pd from gplearn.fitness import make_fitness def _my_metric(y, y_pred, w): value = np.sum(np.abs(y) + np.abs(y_pred)) return value def _msle(y, y_pred, w): value = np.square((np.log1p(y) - np.log1p(y_pred))).mean() return value def _mse(y, y_pred, w): value = np.square(np.subtract(y, y_pred)).mean() return value my_metric = make_fitness(function=_my_metric, greater_is_better=True) MSLE = make_fitness(function=_msle, greater_is_better=False) MSE = make_fitness(function=_mse, greater_is_better=False)
labels, test_size=0.2) def _accuracy(y, y_pred, w): """Calculate the accuracy.""" if y_pred < 0: y_pred = 0 else: y_pred = 1 diffs = np.abs(y - y_pred) # calculate how many different values return 1 - (np.sum(diffs) / len(y_pred)) accuracy = make_fitness(_accuracy, greater_is_better=True) est_gp = SymbolicClassifier( population_size=1000, generations=200, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, feature_names=('V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34',
return np.where(np.abs(x1) < 100, np.exp(x1), 0.) exp = functions.make_function(function=_protected_exponent, name='exp', arity=1) # https://gplearn.readthedocs.io/en/stable/advanced.html#custom-fitness # Custom fitness function for - sum of absolute errors def _nsae(true_y, pred_y, w): diffs = np.abs(true_y - pred_y) return -sum(diffs) nsae = fitness.make_fitness(_nsae, greater_is_better=True) # Run symbolic regression def symbolicRegr(funcs): gpRun = genetic.SymbolicRegressor(population_size=popSize, generations=noGens, tournament_size=20, const_range=None, function_set=funcs, metric=nsae, p_crossover=crossoverProb, p_subtree_mutation=mutationProb, p_hoist_mutation=mutationProb, p_point_mutation=mutationProb, verbose=0)