Example #1
0
def test_validate_fitness():
    """Check that valid fitness measures are accepted & invalid raise error"""

    # Check arg count checks
    _ = make_fitness(function=_mean_square_error, greater_is_better=True)
    # non-bool greater_is_better
    assert_raises(ValueError, make_fitness, _mean_square_error, 'Sure')
    assert_raises(ValueError, make_fitness, _mean_square_error, 1)

    # Check arg count tests
    def bad_fun1(x1, x2):
        return 1.0
    assert_raises(ValueError, make_fitness, bad_fun1, True)

    # Check return type tests
    def bad_fun2(x1, x2, w):
        return 'ni'
    assert_raises(ValueError, make_fitness, bad_fun2, True)

    def _custom_metric(y, y_pred, w):
        """Calculate the root mean square error."""
        return np.sqrt(np.average(((y_pred - y) ** 2), weights=w))

    custom_metric = make_fitness(function=_custom_metric,
                                 greater_is_better=True)

    for Symbolic in (SymbolicRegressor, SymbolicTransformer):
        # These should be fine
        est = Symbolic(generations=2, random_state=0, metric=custom_metric)
        est.fit(boston.data, boston.target)
Example #2
0
def test_parallel_custom_metric():
    """Regression test for running parallel training with custom transformer"""

    def _custom_metric(y, y_pred, w):
        """Calculate the root mean square error."""
        return np.sqrt(np.average(((y_pred - y) ** 2), weights=w))

    custom_metric = make_fitness(function=_custom_metric,
                                 greater_is_better=True)
    est = SymbolicRegressor(generations=2,
                            metric=custom_metric,
                            random_state=0,
                            n_jobs=2)
    est.fit(boston.data, boston.target)
    _ = pickle.dumps(est)

    # Unwrapped functions should fail
    custom_metric = make_fitness(function=_custom_metric,
                                 greater_is_better=True,
                                 wrap=False)
    est = SymbolicRegressor(generations=2,
                            metric=custom_metric,
                            random_state=0,
                            n_jobs=2)
    est.fit(boston.data, boston.target)
    assert_raises(AttributeError, pickle.dumps, est)

    # Single threaded will also fail in non-interactive sessions
    est = SymbolicRegressor(generations=2,
                            metric=custom_metric,
                            random_state=0)
    est.fit(boston.data, boston.target)
    assert_raises(AttributeError, pickle.dumps, est)
Example #3
0
def test_customized_regressor_metrics():
    """Check whether greater_is_better works for SymbolicRegressor."""

    x_data = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2

    est_gp = SymbolicRegressor(metric='mean absolute error',
                               stopping_criteria=0.000001, random_state=415,
                               parsimony_coefficient=0.001, init_method='full',
                               init_depth=(2, 4))
    est_gp.fit(x_data, y_true)
    formula = est_gp.__str__()
    assert_equal('add(mul(X1, X1), mul(X0, X0))', formula, True)

    def neg_mean_absolute_error(y, y_pred, sample_weight):
        return -1 * mean_absolute_error(y, y_pred, sample_weight)

    customized_fitness = make_fitness(neg_mean_absolute_error,
                                      greater_is_better=True)

    c_est_gp = SymbolicRegressor(metric=customized_fitness,
                                 stopping_criteria=-0.000001, random_state=415,
                                 parsimony_coefficient=0.001, verbose=0,
                                 init_method='full', init_depth=(2, 4))
    c_est_gp.fit(x_data, y_true)
    c_formula = c_est_gp.__str__()
    assert_equal('add(mul(X1, X1), mul(X0, X0))', c_formula, True)
Example #4
0
    def make_explict_fitness(func, metric, greater_is_better, use_raw_y=False):
        """

        :param func: function
            the function this is used to get the reward given output
        :param metric: function
            the function measures the fitness
        :param greater_is_better: bool
            whether it is true that the greater the fitness is the better the performance is
        :return:
        """

        def _fitness(y, y_pred, sample_weight):
            """
               :param y: [0] * len(x)
                   This should be None since we wont know the reward before we get the y_pred.
                   In practice we use [0] * len(x)
               :param y_pred:
                   The y_pred generated by the algorithm
               :param sample_weight:
                   sample_weight for each label
               :return:
            """
            if use_raw_y:
                y = func(y)
            else:
                y = func(y_pred)
            return metric(y, y_pred, sample_weight)

        return make_fitness(_fitness, greater_is_better)
Example #5
0
def test_validate_fitness():
    """Check that custom fitness functions are accepted"""
    def _custom_metric(y, y_pred, w):
        """Calculate the root mean square error."""
        return np.sqrt(np.average(((y_pred - y)**2), weights=w))

    custom_metric = make_fitness(function=_custom_metric,
                                 greater_is_better=True)

    for Symbolic in (SymbolicRegressor, SymbolicTransformer):
        # These should be fine
        est = Symbolic(generations=2, random_state=0, metric=custom_metric)
        est.fit(boston.data, boston.target)
Example #6
0
    def fit(self, x_data):
        est_gp = SymbolicRegressor(population_size=500,
                                   generations=10,
                                   stopping_criteria=0.0001,
                                   p_crossover=0.7,
                                   p_subtree_mutation=0.1,
                                   p_hoist_mutation=0.05,
                                   p_point_mutation=0.1,
                                   metric=make_fitness(
                                       self.make_explict_func(), False),
                                   function_set=self.function_set,
                                   verbose=1,
                                   parsimony_coefficient=0.01)

        indicies = np.arange(x_data.shape[0])
        est_gp.fit(x_data, indicies)

        return est_gp
Example #7
0
def test_validate_fitness():
    """Check that valid fitness measures are accepted & invalid raise error"""

    # Check arg count checks
    fun = make_fitness(function=_mean_square_error, greater_is_better=True)
    # non-bool greater_is_better
    assert_raises(ValueError, make_fitness, _mean_square_error, 'Sure')
    assert_raises(ValueError, make_fitness, _mean_square_error, 1)

    # Check arg count tests
    def bad_fun1(x1, x2):
        return 1.0
    assert_raises(ValueError, make_fitness, bad_fun1, True)

    # Check return type tests
    def bad_fun2(x1, x2, w):
        return 'ni'
    assert_raises(ValueError, make_fitness, bad_fun2, True)
Example #8
0
def test_custom_transformer_metrics():
    """Check whether greater_is_better works for SymbolicTransformer."""

    est_gp = SymbolicTransformer(generations=2,
                                 population_size=100,
                                 hall_of_fame=10,
                                 n_components=1,
                                 metric='pearson',
                                 random_state=415)
    est_gp.fit(boston.data, boston.target)
    for program in est_gp:
        formula = program.__str__()
    expected_formula = ('sub(div(mul(X4, X12), div(X9, X9)), '
                        'sub(div(X11, X12), add(X12, X0)))')
    assert_equal(expected_formula, formula, True)

    def _neg_weighted_pearson(y, y_pred, w):
        """Calculate the weighted Pearson correlation coefficient."""
        with np.errstate(divide='ignore', invalid='ignore'):
            y_pred_demean = y_pred - np.average(y_pred, weights=w)
            y_demean = y - np.average(y, weights=w)
            corr = (
                (np.sum(w * y_pred_demean * y_demean) / np.sum(w)) / np.sqrt(
                    (np.sum(w * y_pred_demean**2) * np.sum(w * y_demean**2)) /
                    (np.sum(w)**2)))
        if np.isfinite(corr):
            return -1 * np.abs(corr)
        return 0.

    neg_weighted_pearson = make_fitness(function=_neg_weighted_pearson,
                                        greater_is_better=False)

    c_est_gp = SymbolicTransformer(generations=2,
                                   population_size=100,
                                   hall_of_fame=10,
                                   n_components=1,
                                   stopping_criteria=-1,
                                   metric=neg_weighted_pearson,
                                   random_state=415)
    c_est_gp.fit(boston.data, boston.target)
    for program in c_est_gp:
        c_formula = program.__str__()
    assert_equal(expected_formula, c_formula, True)
Example #9
0
def test_custom_classifier_metrics():
    """Check whether greater_is_better works for SymbolicClassifier."""

    x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2)
    y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2
    y_true = (y_true < y_true.mean()).astype(int)

    est_gp = SymbolicClassifier(metric='log loss',
                                stopping_criteria=0.000001,
                                random_state=415,
                                parsimony_coefficient=0.01,
                                init_method='full',
                                init_depth=(2, 4))
    est_gp.fit(x_data, y_true)
    formula = est_gp.__str__()
    expected_formula = 'sub(0.364, mul(add(X0, X0), add(X0, X0)))'
    assert_equal(expected_formula, formula, True)

    def negative_log_loss(y, y_pred, w):
        """Calculate the log loss."""
        eps = 1e-15
        y_pred = np.clip(y_pred, eps, 1 - eps)
        score = y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred)
        return np.average(score, weights=w)

    customized_fitness = make_fitness(negative_log_loss,
                                      greater_is_better=True)

    c_est_gp = SymbolicClassifier(metric=customized_fitness,
                                  stopping_criteria=0.000001,
                                  random_state=415,
                                  parsimony_coefficient=0.01,
                                  init_method='full',
                                  init_depth=(2, 4))
    c_est_gp.fit(x_data, y_true)
    c_formula = c_est_gp.__str__()
    assert_equal(expected_formula, c_formula, True)
Example #10
0
#                     definate_variable=[
#                                        # [-4, [3]],
#                                        [-3, [2]],
#                                        [-2, [1]],
#                                        [-1, [0]]],
#                     variable_linkage=None)
# result = mainPart(X, y, pset, pop_n=500, random_seed=6, cxpb=0.5, mutpb=0.5, ngen=10, tournsize=3, max_value=10,max_=3,
#                   double=False, score=[r2_score,custom_loss_func], iner_add=True, target_dim=None,cal_dim=False,store=False)


def _mape(y, y_pred, w):
    """Calculate the mean absolute percentage error."""
    return r2_score(y, y_pred, w)


mape = make_fitness(_mape, greater_is_better=True)

# X = normalize(X)

# sr = SymbolicRegressor(population_size=1000, generations=50, tournament_size=100, stopping_criteria=0.1,
#                        const_range=(-1.0, 1.0), init_depth=(4, 6), init_method='half and half',
#                        function_set=('add', 'sub', 'mul', 'div',"log"), metric=mape,
#                        parsimony_coefficient=0.001, p_crossover=0.9, p_subtree_mutation=0.01,
#                        p_hoist_mutation=0.01, p_point_mutation=0.01, p_point_replace=0.05,
#                        max_samples=1.0, feature_names=None, warm_start=False, low_memory=False,
#                        n_jobs=1, verbose=0, random_state=7)

sr = SymbolicTransformer(population_size=1000,
                         hall_of_fame=100,
                         n_components=10,
                         generations=20,
Example #11
0
#Define exp
def _exp(x):
    y = np.exp(x)
    #protext for infinites
    y[np.isinf(y)] = 10**6
    return y


exp = functions.make_function(_exp, 'exp', 1)

function_set = ['add', 'sub', 'mul', 'div', 'log', 'sin', 'cos', exp]

#create summed absolute error as metric
_sae = lambda y, t, w: np.sum(np.abs(y - t))
sae = fitness.make_fitness(_sae, False)

n_generations = 50
#Initialize genetic programm regressor
est_gp = genetic.SymbolicRegressor(population_size=1000,
                                   generations=1,
                                   stopping_criteria=0.01,
                                   p_crossover=0.7,
                                   p_subtree_mutation=0,
                                   p_hoist_mutation=0,
                                   p_point_mutation=0,
                                   max_samples=0.9,
                                   verbose=1,
                                   parsimony_coefficient=0,
                                   random_state=0,
                                   metric=sae,
Example #12
0
scale = make_function(function=_scale, name='scale', arity=1)

user_function = [exp, square, ts_mid, wma, skew, kurt, norm, normMaxMin, \
                 corr, cov, delta_pct, reg_alpha, reg_beta, reg_resi, \
                     delta, delay, rank, scale, sma, stddev, product, \
                         ts_rank, ts_min, ts_max, ts_argmax, ts_argmin, ts_sum]

#%% 设置目标函数


def _my_metric(y, y_pred, w):
    value = np.sum(y + y_pred)
    return value


my_metric = make_fitness(function=_my_metric, greater_is_better=True)

#%% 生成表达式

generations = 3  # 进化世代数
population_size = 1000  # 每一代中的公式数量
tournament_size = 20  # 每一代中被随机选中计算适应度的公式数
const_range = (0.0, 10.0)
function_set = init_function + user_function  # 函数算子
metric = my_metric  # 目标函数作为适应度
random_state = 316  # 设置随机种子
est_gp = SymbolicTransformer(feature_names=fields,
                             function_set=function_set,
                             generations=generations,
                             metric=metric,
                             population_size=population_size,
Example #13
0
        # annual_std = annualized_factor[data_frequency] * np.nanstd(daily_ret)
        std = np.nanstd(daily_ret)  # not annualized
        if std == 0:
            sp = 0
        else:
            sp = totret / std
    return sp


def _accuracy_score(y, y_pred, w=None):
    y_digi = np.digitize(y, [-0.05, 0.05]) - 1
    y_pred_digi = np.digitize(y_pred, [-0.05, 0.05]) - 1
    return accuracy_score(y_digi, y_pred_digi)


gp_sharpe = make_fitness(_sharpe, greater_is_better=True)
gp_accuracy_score = make_fitness(_accuracy_score, greater_is_better=True)


def clean_gplearn_programs(gplearn_programs, verbose=0):
    all_programs_info_list = []
    if verbose > 0:
        iterobj = tqdm(enumerate(gplearn_programs))
    else:
        iterobj = enumerate(gplearn_programs)

    for gen_i, gen in iterobj:
        for prog_i, prog in enumerate(gen):
            if prog is not None:
                _fitness = prog.fitness_
                _depth = prog.depth_
import numpy as np
import matplotlib.pyplot as plt
from gplearn.functions import make_function
from gplearn.fitness import make_fitness
from gplearn.genetic import SymbolicRegressor
import graphviz

def exp_func(x):
    with np.errstate(over='ignore'):
        return np.where(np.abs(x) < 100, np.exp(x), 0.)
exp = make_function(function=exp_func, name='expo', arity=1)

def _fitness(y, y_pred, sample_weight):
    return np.sum(np.abs(y-y_pred))
fit = make_fitness(function=_fitness, greater_is_better=False, wrap=False)

def get_data():
    x = np.linspace(-1, 1, 21).reshape(-1,1)
    y = np.array([0, -0.1629, -0.2624, -0.3129, -0.3264, -0.3125, -0.2784, -0.2289, -0.1664, -0.0909, 0.0, 0.1111, 0.2496, 0.4251, 0.6496, 0.9375, 1.3056, 1.7731, 2.3616, 3.0951, 4.0000] )
    return x, y

pop_size = 1000
function_set = ['add', 'sub', 'mul', 'log', exp, 'sin', 'cos', 'div']
num_generations = 50
crossover_prob = 0.7
mutation_prob = 0.1

def experiment(seed, i):
    est_gp = SymbolicRegressor(population_size = pop_size,
                               generations=num_generations, stopping_criteria=0.01,
                               p_crossover=crossover_prob, p_subtree_mutation=mutation_prob,
Example #15
0
#
def explicit_fitness(y, y_pred, sample_weight):
    n_data = len(y)
    y = [int(_) for _ in y]
    indices = (ctypes.c_int * n_data)(*y)
    arr = (ctypes.c_double * n_data)(*y_pred)
    res = get_reward_func(indices, arr)
    # print(res)
    return res


# metric_gp = DynamicSymbolicRegressor.make_explict_fitness(get_reward_func, y_as_fitness, False)


# x_data = x_data.reshape(10, 1)
est_gp = SymbolicRegressor(population_size=50,
                           generations=20, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           metric=make_fitness(explicit_fitness, False),
                           max_samples=0.9, verbose=1,
                           parsimony_coefficient=0.01, random_state=0)
_ = [i for i in range(x_data.shape[0])]
est_gp.fit(x_data, _)
from PIL import Image

graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz())
graph.write_png("tree.png")
# print([method for method in dir(graph) if callable(getattr(graph, method))])
# Image.open(graph.create_png())
Example #16
0
def train():

    mid = Middleware(CONFIG_DLL_PATH)

    get_reward_func = mid.get_function(CONFIG_FUNC_KEY_REWARD)
    get_reward_func.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_double), ctypes.c_int,
                                ctypes.POINTER(ctypes.c_double), ctypes.c_int, ctypes.c_int]
    get_reward_func.restype = ctypes.c_double

    cheating_func = mid.get_function(CONFIG_FUNC_KEY_CHEAT)
    cheating_func.restype = ctypes.POINTER(ctypes.c_double)

    x_data = read_data(CONFIG_FILE_PATH)
    _x_data = x_data.flatten()
    x_arr_pointer = _x_data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
    x_len = len(_x_data)

    n_data = int(x_len / CONFIG_N_DIM)


    def explicit_fitness(y, _y_pred, sample_weight):
        _indices = np.array([i for i in range(len(y)) if sample_weight[i]], dtype=int)
        _y_pred_arr = np.array([_y_pred[i] for i in range(len(y)) if sample_weight[i]], dtype=float)

        _n_data = len(_indices)

        indices_pointer = _indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
        y_pred_arr_pointer = _y_pred_arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double))

        result = get_reward_func(indices_pointer, y_pred_arr_pointer, _n_data, x_arr_pointer, CONFIG_N_DIM, x_len)

        return result


    explicit_fitness.counter = 0
    explicit_fitness.res = 0

    function_set = ['add', 'sub', 'mul', 'div', 'sin']
    est_gp = SymbolicRegressor(population_size=5000,
                               generations=10, stopping_criteria=0.01,
                               p_crossover=0.7, p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05, p_point_mutation=0.1,
                               metric=make_fitness(explicit_fitness, False),
                               function_set=function_set,
                               max_samples=0.8, verbose=1,
                               parsimony_coefficient=0.01, random_state=0)

    _ = [i for i in range(x_data.shape[0])]
    est_gp.fit(x_data, _)

    ts = int(time.time())

    graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz())
    graph.write_png("outputs/gp-{suffix}.png".format(suffix=ts))

    res = cheating_func(x_arr_pointer, CONFIG_N_DIM, x_len)
    y_truth = np.array([float(res[i]) for i in range(n_data)])
    y_pred = np.array(est_gp.predict(x_data))

    n_data_plot = 200
    indicies_plot = sorted(np.random.choice(n_data, n_data_plot, replace=False))

    canvas = gp_plot.GPCanvas()
    canvas.draw_line_chart_2d(range(0, n_data_plot), y_truth[indicies_plot], color="blue", label="y_truth",
                              line_style="solid")

    canvas.draw_line_chart_2d(range(0, n_data_plot), y_pred[indicies_plot], color="red", label="y_pred")

    mse = ((np.array(y_truth) - np.array(y_pred)) ** 2).mean()

    canvas.set_x_label("Indices")
    canvas.set_y_label("Values")
    canvas.set_title("Fitting plot with MSE={:5f}".format(mse))
    canvas.set_legend()
    canvas.set_axis_invisible()

    canvas.froze()
Example #17
0
import numpy as np
import pandas as pd
from gplearn.fitness import make_fitness


def _my_metric(y, y_pred, w):
    value = np.sum(np.abs(y) + np.abs(y_pred))

    return value


def _msle(y, y_pred, w):
    value = np.square((np.log1p(y) - np.log1p(y_pred))).mean()

    return value


def _mse(y, y_pred, w):
    value = np.square(np.subtract(y, y_pred)).mean()

    return value


my_metric = make_fitness(function=_my_metric, greater_is_better=True)
MSLE = make_fitness(function=_msle, greater_is_better=False)
MSE = make_fitness(function=_mse, greater_is_better=False)
                                                    labels,
                                                    test_size=0.2)


def _accuracy(y, y_pred, w):
    """Calculate the accuracy."""
    if y_pred < 0:
        y_pred = 0
    else:
        y_pred = 1
    diffs = np.abs(y - y_pred)  # calculate how many different values

    return 1 - (np.sum(diffs) / len(y_pred))


accuracy = make_fitness(_accuracy, greater_is_better=True)

est_gp = SymbolicClassifier(
    population_size=1000,
    generations=200,
    stopping_criteria=0.01,
    p_crossover=0.7,
    p_subtree_mutation=0.1,
    p_hoist_mutation=0.05,
    p_point_mutation=0.1,
    max_samples=0.9,
    verbose=1,
    feature_names=('V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
                   'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18',
                   'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26',
                   'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34',
Example #19
0
        return np.where(np.abs(x1) < 100, np.exp(x1), 0.)


exp = functions.make_function(function=_protected_exponent,
                              name='exp',
                              arity=1)


# https://gplearn.readthedocs.io/en/stable/advanced.html#custom-fitness
# Custom fitness function for - sum of absolute errors
def _nsae(true_y, pred_y, w):
    diffs = np.abs(true_y - pred_y)
    return -sum(diffs)


nsae = fitness.make_fitness(_nsae, greater_is_better=True)


# Run symbolic regression
def symbolicRegr(funcs):
    gpRun = genetic.SymbolicRegressor(population_size=popSize,
                                      generations=noGens,
                                      tournament_size=20,
                                      const_range=None,
                                      function_set=funcs,
                                      metric=nsae,
                                      p_crossover=crossoverProb,
                                      p_subtree_mutation=mutationProb,
                                      p_hoist_mutation=mutationProb,
                                      p_point_mutation=mutationProb,
                                      verbose=0)