Example #1
0
def test_parallel_custom_transformer():
    """Regression test for running parallel training with custom transformer"""
    def _sigmoid(x1):
        with np.errstate(over='ignore', under='ignore'):
            return 1 / (1 + np.exp(-x1))

    sigmoid = make_function(function=_sigmoid, name='sig', arity=1)
    est = SymbolicClassifier(generations=2,
                             transformer=sigmoid,
                             random_state=0,
                             n_jobs=2)
    est.fit(cancer.data, cancer.target)
    _ = pickle.dumps(est)

    # Unwrapped functions should fail
    sigmoid = make_function(function=_sigmoid, name='sig', arity=1, wrap=False)
    est = SymbolicClassifier(generations=2,
                             transformer=sigmoid,
                             random_state=0,
                             n_jobs=2)
    est.fit(cancer.data, cancer.target)
    assert_raises(AttributeError, pickle.dumps, est)

    # Single threaded will also fail in non-interactive sessions
    est = SymbolicClassifier(generations=2,
                             transformer=sigmoid,
                             random_state=0)
    est.fit(cancer.data, cancer.target)
    assert_raises(AttributeError, pickle.dumps, est)
Example #2
0
def test_parallel_custom_function():
    """Regression test for running parallel training with custom functions"""
    def _logical(x1, x2, x3, x4):
        return np.where(x1 > x2, x3, x4)

    logical = make_function(function=_logical, name='logical', arity=4)
    est = SymbolicRegressor(generations=2,
                            function_set=['add', 'sub', 'mul', 'div', logical],
                            random_state=0,
                            n_jobs=2)
    est.fit(boston.data, boston.target)
    _ = pickle.dumps(est)

    # Unwrapped functions should fail
    logical = make_function(function=_logical,
                            name='logical',
                            arity=4,
                            wrap=False)
    est = SymbolicRegressor(generations=2,
                            function_set=['add', 'sub', 'mul', 'div', logical],
                            random_state=0,
                            n_jobs=2)
    est.fit(boston.data, boston.target)
    assert_raises(AttributeError, pickle.dumps, est)

    # Single threaded will also fail in non-interactive sessions
    est = SymbolicRegressor(generations=2,
                            function_set=['add', 'sub', 'mul', 'div', logical],
                            random_state=0)
    est.fit(boston.data, boston.target)
    assert_raises(AttributeError, pickle.dumps, est)
Example #3
0
    def fit(self, X, y=None, state={}):
        exponential = make_function(function=exponent, name='exp', arity=1)

        function_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max',
                        'min', 'tan', 'sin', 'cos', exponential]

        gp = SymbolicTransformer(generations=self.generations, population_size=self.population,
                                 hall_of_fame=self.hall_of_fame, n_components=self.components,
                                 function_set=function_set,
                                 parsimony_coefficient='auto',
                                 max_samples=0.6, verbose=1, metric=self.metric,
                                 random_state=0, n_jobs=7)

        self.state['genetic'] = {}
        self.state['genetic']['fit'] = gp.fit(X, y)

        return self
Example #4
0
def test_validate_function():
    """Check that valid functions are accepted & invalid ones raise error"""

    # Check arity tests
    _ = make_function(function=_protected_sqrt, name='sqrt', arity=1)
    # non-integer arity
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', '1')
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 1.0)
    # non-bool wrap
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 1, 'f')
    # non-matching arity
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 2)
    assert_raises(ValueError, make_function, maximum, 'max', 1)

    # Check name test
    assert_raises(ValueError, make_function, _protected_sqrt, 2, 1)

    # Check return type tests
    def bad_fun1(x1, x2):
        return 'ni'

    assert_raises(ValueError, make_function, bad_fun1, 'ni', 2)

    # Check return shape tests
    def bad_fun2(x1):
        return np.ones((2, 1))

    assert_raises(ValueError, make_function, bad_fun2, 'ni', 1)

    # Check closure for negatives test
    def _unprotected_sqrt(x1):
        with np.errstate(divide='ignore', invalid='ignore'):
            return np.sqrt(x1)

    assert_raises(ValueError, make_function, _unprotected_sqrt, 'sqrt', 1)

    # Check closure for zeros test
    def _unprotected_div(x1, x2):
        with np.errstate(divide='ignore', invalid='ignore'):
            return np.divide(x1, x2)

    assert_raises(ValueError, make_function, _unprotected_div, 'div', 2)
def test_function_in_program():
    """Check that using a custom function in a program works"""
    def logic(x1, x2, x3, x4):
        return np.where(x1 > x2, x3, x4)

    logical = make_function(function=logic, name='logical', arity=4)
    function_set = ['add', 'sub', 'mul', 'div', logical]
    est = SymbolicTransformer(generations=2,
                              population_size=2000,
                              hall_of_fame=100,
                              n_components=10,
                              function_set=function_set,
                              parsimony_coefficient=0.0005,
                              max_samples=0.9,
                              random_state=0)
    est.fit(boston.data[:300, :], boston.target[:300])

    formula = est._programs[0][906].__str__()
    expected_formula = 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)'
    assert_equal(expected_formula, formula, True)
Example #6
0
def test_custom_functions():
    """Test the custom programs example works"""

    rng = check_random_state(0)
    boston = load_boston()
    perm = rng.permutation(boston.target.size)
    boston.data = boston.data[perm]
    boston.target = boston.target[perm]

    def logic(x1, x2, x3, x4):
        return np.where(x1 > x2, x3, x4)

    logical = make_function(function=logic, name='logical', arity=4)

    function_set = ['add', 'sub', 'mul', 'div', logical]
    gp = SymbolicTransformer(generations=2,
                             population_size=2000,
                             hall_of_fame=100,
                             n_components=10,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             random_state=0)

    gp.fit(boston.data[:300, :], boston.target[:300])

    assert_equal(gp._programs[0][906].__str__(),
                 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)')

    dot_data = gp._programs[0][906].export_graphviz()
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="logical", '
                'fillcolor="#136ed4"] ;\n2 [label="X6", fillcolor="#60a6f6"] '
                ';\n3 [label="add", fillcolor="#136ed4"] ;\n4 [label="X11", '
                'fillcolor="#60a6f6"] ;\n5 [label="0.898", '
                'fillcolor="#60a6f6"] ;\n3 -> 5 ;\n3 -> 4 ;\n6 [label="X10", '
                'fillcolor="#60a6f6"] ;\n7 [label="X2", fillcolor="#60a6f6"] '
                ';\n1 -> 7 ;\n1 -> 6 ;\n1 -> 3 ;\n1 -> 2 ;\n8 [label="X5", '
                'fillcolor="#60a6f6"] ;\n0 -> 8 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
Example #7
0
def projection_generator_function(max_arity, projection='np.mean'):
    function_list = []
    base_arity = 3
    for current_arity in range(base_arity, max_arity):
        base_str = "def experiment_file("
        for i in range(base_arity, base_arity + current_arity):
            base_str += 'x%d,' % i
        base_str = base_str[:-1]
        base_str += "):\n\treturn "
        base_str += '%s(np.vstack([' % projection
        for i in range(base_arity, base_arity + current_arity):
            base_str += 'x%d,' % i
        base_str = base_str[:-1]
        base_str += "]).T,axis = 1)"
        base_code = compile(base_str, "<string>", "exec")
        base_code = FunctionType(base_code.co_consts[0], globals(),
                                 "base_code")
        function_list.append(
            make_function(base_code,
                          '%s_%d' % (projection, current_arity),
                          arity=current_arity))
    return function_list
Example #8
0
   #return a or b
#    for i in range(tam):
#        x[i]=a[i]|b[i]
    
#    return x
def logi_or(a, b):
    return a.astype(bool) | b.astype(bool)
    
def logi_not(a):
    #return not a
    return ~a.astype(bool) 
        
def logi_xor(a,b):
     return a != b

logic_and = make_function(function=logi_and, name='op_and',arity=2)
logic_or = make_function(function=logi_or, name='op_or',arity=2)
logic_xor = make_function(function=logi_xor, name='op_xor',arity=2)
logic_not = make_function(function=logi_not, name='op_not',arity=1)

#function_set = [logic_and, logic_not,logic_or]
function_set = [logic_and,logic_or,logic_xor,logic_not]
est_gp = SymbolicRegressor(population_size=100,
                           generations=500,
                           #stopping_criteria=0.01,
                           tournament_size=2,
                           function_set= function_set,
                           parsimony_coefficient=0.009,
                           max_samples=1.0,
                           verbose=1,
                           p_crossover=0.9, p_subtree_mutation=0.1,
Example #9
0
    )

    #Scale the X_train,y_train and X_test
    mmx = MinMaxScaler()
    X_train = mmx.fit_transform(X_train)
    X_test = mmx.fit_transform(X_test)

    mmy = MinMaxScaler()
    y_train = mmy.fit_transform(y_train)

    #Save the scalers
    dump(mmx,"./mmx.bin")
    dump(mmy,"./mmy.bin")    
    
    #Make custom function
    power = make_function(function=power, name="power", arity=2)
    power_2 = make_function(function=power_2, name="power_2", arity=1)
    power_3 = make_function(function=power_3, name="power_3", arity=1)
    power_4 = make_function(function=power_4, name="power_4", arity=1)
    
    #Form the function set
    function_set = [
        "add", "sub", "mul", "div", "inv", "sqrt",                  #Default-function
        power_2                                                     #Custom-function
    ]

    #Converter: from function to string (*args to sympify)
    converter = {
        'add': lambda x, y : x + y,
        'sub': lambda x, y : x - y,
        'mul': lambda x, y : x*y,
    value = gb.apply(lambda x: x / x.sum())

    return np.nan_to_num(value.values)


def _sec_demean(df1):  # 截面去均值
    df = pd.DataFrame({'0': df1})
    df['time'] = trade_date
    df['code'] = stock_code
    gb = df.groupby('time')['0']
    value = gb.apply(lambda x: x - x.mean())

    return np.nan_to_num(value.values)


exp = make_function(function=_exp, name='exp', arity=1)
square = make_function(function=_square, name='square', arity=1)
ts_max = make_function(function=_ts_max, name='ts_max', arity=2)
ts_min = make_function(function=_ts_min, name='ts_min', arity=2)
ts_mid = make_function(function=_ts_mid, name='ts_mid', arity=2)
ts_mean = make_function(function=_ts_mean, name='ts_mean', arity=2)
ts_wma = make_function(function=_ts_wma, name='ts_wma', arity=2)
ts_std = make_function(function=_ts_std, name='ts_std', arity=2)
ts_skew = make_function(function=_ts_skew, name='ts_skew', arity=2)
ts_kurt = make_function(function=_ts_kurt, name='ts_kurt', arity=2)
ts_norm = make_function(function=_ts_norm, name='tsnorm', arity=2)
ts_normMaxMin = make_function(function=_ts_normMaxMin,
                              name='ts_normMaxMin',
                              arity=2)
ts_rank = make_function(function=_ts_rank, name='ts_rank', arity=2)
ts_argmax = make_function(function=_ts_argmax, name='ts_argmax', arity=2)
Example #11
0
if __name__ == '__main__':
    f = "/home/philgun/Documents/coolstuff/coolstuff/ML/script/script/RBF/data/data.mat"
    data = Data()
    df = data.generate_data(f)

    df = df[
        ["der_hf_2","T_f_2","T_f_1","T_f_3","T_s_2","u_flow"]
    ]

    print(df)

    X = df[df.columns[1:]].to_numpy()
    y =  df[df.columns[0]].to_numpy().reshape(-1,1)

    power = make_function(function=power, name="power", arity=2)
    power_2 = make_function(function=power_2, name="power_2", arity=1)

    function_set = [
        "add", "sub", "mul", "div", "inv", "sqrt","log",
        power_2
    ]

    converter = {
        'add': lambda x, y : x + y,
        'sub': lambda x, y : x - y,
        'mul': lambda x, y : x*y,
        'div': lambda x, y : x/y,
        'neg': lambda x    : -x,
        'inv': lambda x: 1/x,
        'sqrt': lambda x: x**0.5,
Example #12
0
popSize = 1000
noGens = 50
crossoverProb = 0.7
mutationProb = 0.0


# https://gplearn.readthedocs.io/en/stable/advanced.html#custom-functions
# Custom safe exponent function
def _protected_exponent(x1):
    with np.errstate(over='ignore'):
        return np.where(np.abs(x1) < 100, np.exp(x1), 0.)


exp = functions.make_function(function=_protected_exponent,
                              name='exp',
                              arity=1)


# https://gplearn.readthedocs.io/en/stable/advanced.html#custom-fitness
# Custom fitness function for - sum of absolute errors
def _nsae(true_y, pred_y, w):
    diffs = np.abs(true_y - pred_y)
    return -sum(diffs)


nsae = fitness.make_fitness(_nsae, greater_is_better=True)


# Run symbolic regression
def symbolicRegr(funcs):
Example #13
0
    0, -.1629, -.2624, -.3129, -.3264, -.3125, -.2784, -.2289, -.1664, -.0909,
    0, .1111, .2496, .4251, .6496, .9375, 1.3056, 1.7731, 2.3616, 3.0951, 4.
])
x = x.reshape(-1, 1)
y = y.reshape(-1, 1)


#Define exp
def _exp(x):
    y = np.exp(x)
    #protext for infinites
    y[np.isinf(y)] = 10**6
    return y


exp = functions.make_function(_exp, 'exp', 1)

function_set = ['add', 'sub', 'mul', 'div', 'log', 'sin', 'cos', exp]

#create summed absolute error as metric
_sae = lambda y, t, w: np.sum(np.abs(y - t))
sae = fitness.make_fitness(_sae, False)

n_generations = 50
#Initialize genetic programm regressor
est_gp = genetic.SymbolicRegressor(population_size=1000,
                                   generations=1,
                                   stopping_criteria=0.01,
                                   p_crossover=0.7,
                                   p_subtree_mutation=0,
                                   p_hoist_mutation=0,
    compare_htm.append([fidelity_train, fidelity_test])

----------------------------------------
#
# Genetic Programming with gplearn
#

# to add an exponential function to the function set,
# I needed to create one from "scratch", and to prevent overflow
# errors from the gplearn method a "protected" exponential function
# is needed
def _protected_exponent(x):
    with np.errstate(over='ignore'):
       return np.where(np.abs(x) < 100, np.exp(x), 0.)

e_function = make_function(function = _protected_exponent, name = 'e_func', arity = 1)
# the function set must be specified, otherwise the gplearn method
# will only look at ['add', 'sub', 'mul', 'div']
f_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', e_function]

time_step = 3
fidelity = []
for j in rand_indx:

    seqn = data[j]
    train_size = int(np.ceil(len(seqn)*0.70))
    x = np.reshape(np.array([i for i in range(len(seqn))]), (len(seqn), 1))

    # population_size = initial number of random programs to examine
    # generations = max number of times programs will continue on to earn a chance at becoming fit
    # tournament_size = number of programs that will compete
Example #15
0
    z = np.tanh(x)

    z = np.where(np.abs(z) < 1e+1000, z, np.sign(z)*1e+1000)
    z[~np.isfinite(z)]=0
    
    return z

def pexp(x):
    z = np.where(np.exp(x) <= np.exp(100), np.exp(x), np.exp(100))
    
    z = np.where(np.abs(z) < 1e+1000, z, np.sign(z)*1e+1000)
    z[~np.isfinite(z)]=0
    
    return z

myExp  = make_function(pexp, "exp", 1)
#myTanh = make_function(np.tanh, "tanh", 1)
mySqrt = make_function(lambda x: np.sqrt(np.abs(x)), "sqrtabs", 1)
myLog  = make_function(plog, "log",1)
myDiv  = make_function(pdiv, "pdiv", 2)
myTanh = make_function(ptan, "tan", 1)
        
    

def RMSE(yhat, y):
    return np.sqrt(np.square(yhat - y).mean())


# gridsearch_configurations is a dictionary, where each key is a parameter
# and its value can be one of two options:
# - list (python native):
Example #16
0
def _rank(data):
    value = np.array(pd.Series(data.flatten()).rank().tolist())
    value = np.nan_to_num(value)
    return value


def _scale(data):
    k = 1
    data = pd.Series(data.flatten())
    value = data.mul(k).div(np.abs(data).sum())
    value = np.nan_to_num(value)
    return value


exp = make_function(function=_exp, name='exp', arity=1)
square = make_function(function=_square, name='square', arity=1)
ts_max = make_function(function=_ts_max, name='ts_max', arity=2)
ts_min = make_function(function=_ts_min, name='ts_min', arity=2)
ts_mid = make_function(function=_ts_mid, name='ts_mid', arity=2)
sma = make_function(function=_sma, name='sma', arity=2)
wma = make_function(function=_wma, name='wma', arity=2)
stddev = make_function(function=_stddev, name='stddev', arity=2)
skew = make_function(function=_skew, name='skew', arity=2)
kurt = make_function(function=_kurt, name='kurt', arity=2)
norm = make_function(function=_norm, name='norm', arity=2)
normMaxMin = make_function(function=_normMaxMin, name='norm_MaxMin', arity=2)
ts_rank = make_function(function=_ts_rank, name='ts_rank', arity=2)
ts_argmax = make_function(function=_ts_argmax, name='ts_argmax', arity=2)
ts_argmin = make_function(function=_ts_argmin, name='ts_argmin', arity=2)
corr = make_function(function=_corr, name='corr', arity=3)
import numpy as np
import matplotlib.pyplot as plt
from gplearn.functions import make_function
from gplearn.fitness import make_fitness
from gplearn.genetic import SymbolicRegressor
import graphviz

def exp_func(x):
    with np.errstate(over='ignore'):
        return np.where(np.abs(x) < 100, np.exp(x), 0.)
exp = make_function(function=exp_func, name='expo', arity=1)

def _fitness(y, y_pred, sample_weight):
    return np.sum(np.abs(y-y_pred))
fit = make_fitness(function=_fitness, greater_is_better=False, wrap=False)

def get_data():
    x = np.linspace(-1, 1, 21).reshape(-1,1)
    y = np.array([0, -0.1629, -0.2624, -0.3129, -0.3264, -0.3125, -0.2784, -0.2289, -0.1664, -0.0909, 0.0, 0.1111, 0.2496, 0.4251, 0.6496, 0.9375, 1.3056, 1.7731, 2.3616, 3.0951, 4.0000] )
    return x, y

pop_size = 1000
function_set = ['add', 'sub', 'mul', 'log', exp, 'sin', 'cos', 'div']
num_generations = 50
crossover_prob = 0.7
mutation_prob = 0.1

def experiment(seed, i):
    est_gp = SymbolicRegressor(population_size = pop_size,
                               generations=num_generations, stopping_criteria=0.01,
                               p_crossover=crossover_prob, p_subtree_mutation=mutation_prob,
Example #18
0
#    return x
def logi_or(a, b):
    return a.astype(bool) | b.astype(bool)


def logi_not(a):
    #return not a
    return ~a.astype(bool)


def logi_xor(a, b):
    return a != b


logic_and = make_function(function=logi_and, name='AND', arity=2)
logic_or = make_function(function=logi_or, name='OR', arity=2)
logic_xor = make_function(function=logi_xor, name='XOR', arity=2)
logic_not = make_function(function=logi_not, name='NOT', arity=1)

#function_set = [logic_and, logic_not,logic_or]
function_set = [logic_and, logic_or, logic_not]
est_gp = SymbolicRegressor(
    population_size=100,
    generations=150,
    #stopping_criteria=0.01,
    tournament_size=2,
    function_set=function_set,
    parsimony_coefficient=0.009,
    max_samples=1.0,
    verbose=1,
def GeneticPrograming():
    gp_tanh = make_function(tanh, "tanh", 1)
    gp_sinh = make_function(sinh, "sinh", 1)
    gp_cosh = make_function(cosh, "cosh", 1)

    X_test = df_test_sum.drop('time', axis=1).fillna(0)
    X_tr = df_train_sum.drop('time', axis=1).fillna(0)
    y_tr = df_train_sum['time']

    while True:
        est_gp = SymbolicRegressor(
            population_size=200000,
            tournament_size=5000,
            generations=10,
            stopping_criteria=0.0,
            p_crossover=0.9,
            p_subtree_mutation=0.001,
            p_hoist_mutation=0.001,
            p_point_mutation=0.001,
            max_samples=1.0,
            verbose=1,
            function_set=('add', 'sub', 'mul', 'div', gp_tanh, 'sqrt', 'log',
                          'abs', 'neg', 'inv', 'max', 'min', 'tan', 'cos',
                          'sin'),
            #function_set = (gp_tanh, 'add', 'sub', 'mul', 'div'),
            metric='mean absolute error',
            warm_start=True,
            n_jobs=1,
            parsimony_coefficient=0.001,
            random_state=11)

        if (os.path.exists(f'{PICKLE_PATH}\\EQS_gp.pickle')):
            pickle_in = open(f'{PICKLE_PATH}\\EQS_gp.pickle', 'rb')
            est_gp = pickle.load(pickle_in)
            print("Model Loaded")

        est_gp.generations += 10
        est_gp.p_subtree_mutation /= 10
        est_gp.p_hoist_mutation /= 10
        est_gp.p_point_mutation /= 10
        est_gp.parsimony_coefficient /= 10

        alldata = pd.concat([X_tr, X_test])
        scaler = StandardScaler()
        alldata = pd.DataFrame(scaler.fit_transform(alldata),
                               columns=alldata.columns)

        X_tr_scaled = alldata[:X_tr.shape[0]]
        X_test_scaled = alldata[X_tr.shape[0]:]

        est_gp.fit(X_tr_scaled, y_tr)

        with open(f'{PICKLE_PATH}\\EQS_gp.pickle', 'wb') as f:
            pickle.dump(est_gp, f)
            print('Model Saved')

        y_gp = est_gp.predict(X_tr_scaled)
        gpLearn_MAE = mean_absolute_error(y_tr, y_gp)
        print("gpLearn MAE:", gpLearn_MAE)

        submission.time_to_failure = est_gp.predict(X_test_scaled)
        submission.to_csv(f'{DATA_PATH}\\gplearnEQS_submission.csv',
                          index=True)
        print(submission.head())
Example #20
0
domaingrid = np.linspace(xmin, xmax, Ng)

outTrain = np.ravel(sigma(inTrain))


def _protected_exponent(x):
    with np.errstate(over='ignore'):
        return np.where(np.abs(x) < 100, np.exp(x), 0.)


def _protected_negexponent(x):
    with np.errstate(over='ignore'):
        return np.where(np.abs(x) < 100, np.exp(-x), 0.)


pexp = gf.make_function(_protected_exponent, 'exp', 1)
pnexp = gf.make_function(_protected_negexponent, 'nexp', 1)
f_s = ['add', 'sub', 'mul', 'div', pexp, pnexp, 'neg']

est_gp = gl.SymbolicRegressor(init_depth=(2, 4),
                              population_size=3000,
                              tournament_size=20,
                              const_range=(-40, 40),
                              generations=20,
                              stopping_criteria=0.01,
                              p_crossover=0.7,
                              p_subtree_mutation=0.1,
                              warm_start=True,
                              p_hoist_mutation=0.05,
                              p_point_mutation=0.1,
                              max_samples=0.9,
def gplearn_procedure(equation_id,
                      no_samples=1000,
                      input_range=(-1, 1),
                      save_path=None,
                      save=True,
                      load=True,
                      func_set=[
                          'add', 'sub', 'mul', 'div', 'log', 'sqrt', 'cos',
                          'tan', 'sin', 'pow', 'exp'
                      ],
                      verbose=1):
    """
    Uses gplearn to attempt to predict the equation form of 'equation_id'
    Renders a graphviz image to images/gplearn/
    returns predicted equation, R^2 score and time taken
    
    Parameters
    ----------
    equation_id : string
        The ID of an equation in the dataset. Must be a valid one

    no_samples : int 
        The number of samples you want fed in to the algorithm

    input_range: tuple(float, float)
        The minimum and maximum values of all input parameters
    save_path: string path
        The path to where you wish the save this dataframe
    save: boolean
        Saves file to save_path iff True
    load: boolean
        If true then looks for file in save_path and loads it preemptively if it is there

    func_set : list
        List of strings i.e names of functions to include / operations to consider
        current options include
        ‘add’ : addition, arity=2.
        ‘sub’ : subtraction, arity=2.
        ‘mul’ : multiplication, arity=2.
        ‘div’ : protected division where a denominator near-zero returns 1., arity=2.
        ‘sqrt’ : protected square root where the absolute value of the argument is used, arity=1.
        ‘log’ : protected log where the absolute value of the argument is used and a near-zero argument returns 0., arity=1.
        ‘abs’ : absolute value, arity=1.
        ‘neg’ : negative, arity=1.
        ‘inv’ : protected inverse where a near-zero argument returns 0., arity=1.
        ‘max’ : maximum, arity=2.
        ‘min’ : minimum, arity=2.
        ‘sin’ : sine (radians), arity=1.
        ‘cos’ : cosine (radians), arity=1.
        ‘tan’ : tangent (radians), arity=1.

        'exp' : exponential (self defined), arity=1
        'pow' : power (self defined), arity=2

    verbose : int
        controls how much is printed, 0 is quitest

    Returns
    -------
    string, float, float
    """
    try:
        df = create_dataset(equation_id,
                            no_samples=no_samples,
                            input_range=input_range,
                            save_path=save_path,
                            save=save,
                            load=load).dropna()
        X = df.drop('target', axis=1)
        y = df['target']
    except:
        traceback.print_exc()
        print(f"Error on equation {equation_id} skipping")
        return '', 0, 0
    no_samples = min(no_samples, len(y))

    default_func_set = ('add', 'sub', 'mul', 'div', 'log', 'sqrt', 'cos',
                        'tan', 'sin', 'abs', 'neg', 'inv', 'max', 'min')
    final_func_set = []
    for func in func_set:
        if func in default_func_set:
            final_func_set.append(func)
        else:
            if func == "pow":
                final_func_set.append(make_function(power, func, 2))
            elif func == "exp":
                final_func_set.append(make_function(exponent, func, 1))
            elif func == "pi":
                final_func_set.append(make_function(pi, func, 0))
            else:
                warnings.warn(
                    f"{func} is an unrecognized function, skipping it")
                pass

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=10,
                               stopping_criteria=0.01,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               max_samples=0.9,
                               function_set=final_func_set,
                               verbose=verbose,
                               parsimony_coefficient=0.01,
                               random_state=0)

    start = time.time()
    hist = est_gp.fit(X[:no_samples], y[:no_samples])
    end = time.time()
    #print(est_gp._program)
    dot_data = est_gp._program.export_graphviz()
    graph = graphviz.Source(dot_data)
    graph.render(f'images/gplearn/{equation_id}_estimate',
                 format='png',
                 cleanup=True)
    return est_gp._program, est_gp.score(X, y), end - start
import numpy as np
from gplearn.genetic import SymbolicRegressor
from gplearn.functions import make_function
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

def is_less_than_zero(x):
    result = (x < 0)
    return result.astype(int)

def is_greater_than_or_equal_to_zero(x):
    result = (x >= 0)
    return result.astype(int)

is_lt_zero = make_function(is_less_than_zero, "is_lt_zero", arity=1)
is_gte_zero = make_function(is_greater_than_or_equal_to_zero, "is_gte_zero", arity=1)

function_set = [is_lt_zero, is_gte_zero, "mul", "add", "neg"]

X = np.arange(-10, 11).reshape(-1, 1)
y = np.abs(X).reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X.tolist(), y.tolist())

my_abs_gp = SymbolicRegressor(function_set=function_set,
                              init_method="grow",
                              parsimony_coefficient=0.0625,
                              verbose=True)

my_abs_gp.fit(X_train, y_train)
Example #23
0
xmax = max(X_train)

# xmin = 1
# xmax = 3

X_int = np.linspace(xmin, xmax, 200)

N = 20

# X_train = rng.uniform(xmin, xmax, N).reshape(N, 1)
# y_train = np.ravel(X_train)

def protexp(x):
    return np.exp(-np.abs(x))

nexp = gf.make_function(protexp, 'negabsexp', 1)
f_s = ['add', 'sub', 'mul', 'div', 'inv', 'abs', nexp, 'log']

est_gp = gl.SymbolicRegressor(init_depth=(3, 6), population_size=4000,
                              tournament_size=20,
                              generations=30, stopping_criteria=0.01,
                              p_crossover=0.7, p_subtree_mutation=0.1,
                              p_hoist_mutation=0.05, p_point_mutation=0.1,
                              max_samples=0.9, verbose=1,
                              parsimony_coefficient=0.01, random_state=0,
                              function_set=f_s)
est_gp.fit(X_train, y_train)

y_gp = est_gp.predict(np.c_[X_traintemp.ravel()]).reshape(X_traintemp.shape)

print(est_gp.program)
Example #24
0
    'log': (_log, 1),
    'inv': (_inverse, 1),
    'exp': (_exp, 1),
    'sig': (_sigmoid, 1),
    'square': (_square, 1),
    'cube': (_cube, 1),
    'compare': (_compare, 2),
    'scale': (_scale, 1),
    'talib_HT_DCPHASE': (_talib_HT_DCPHASE, 1),
}
base_function_dict = {}
for fn in _base_function_params1:
    base_function_dict[fn] = _function_map[fn]

for fn, (f, a) in _base_function_params2.items():
    base_function_dict[fn] = deepcopy(make_function(function=f, name=fn, arity=a))

# Make time series functions
rolling_periods = {
    'minute': np.array([1, 5, 15, 30, 60]),
    'day': np.array([1, 3, 5, 10, 20]),
}

# annualized_factor = {
#     'minute': np.sqrt(240 * 252),
#     'day': np.sqrt(252)
# }


ts_function_params = {
    # function_name: (function, arity, window_iterator)
Example #25
0
def get_custom_function_list():
    function_list = ['add', 'sub', 'mul', 'div']
    function_list.append(
        gp_func.make_function(function=_logical, name='logical', arity=4))
    return function_list
Example #26
0
def function(x):
    return 3*x**(3.5) + 2

def _pow(x1,x2):
    with np.errstate(over='ignore'):
        return _protected_exponent(x2*_protected_log(x1))

def _protected_exponent(x):
    with np.errstate(over='ignore'):
        return np.where(x < 100, np.exp(x), 2e20)

def _protected_log(x):
    with np.errstate(over='ignore'):
        return np.where(x > 1e-5, np.log(x), -100.0)

exp = make_function(function=_protected_exponent, name='exp', arity=1)
log = make_function(function=_protected_log,      name='log', arity=1)
pow = make_function(function=_pow,                name='pow', arity=2)

################################### INPUT ###########################################
points_training = 1000
points_test     = 150

# symbolic regressor parameters
population_size       = 10000
generations           = 30
tournament_size       = 100
function_set          = ('add', 'sub', 'mul', 'div', exp, log, pow)
metric                = 'mse'
init_depth            = (2, 8)
n_jobs                = 1
def _ts_argmax(data):
    window=10
    value = pd.Series(data.flatten()).rolling(10).apply(np.argmax) + 1 
    value = np.nan_to_num(value)
    
    return value

def _ts_argmin(data):
    window=10
    value = pd.Series(data.flatten()).rolling(10).apply(np.argmin) + 1 
    value = np.nan_to_num(value)
    
    return value

# make_function函数群
delta = make_function(function=_delta, name='delta', arity=1)
delay = make_function(function=_delay, name='delay', arity=1)
rank = make_function(function=_rank, name='rank', arity=1)
scale = make_function(function=_scale, name='scale', arity=1)
sma = make_function(function=_sma, name='sma', arity=1)
stddev = make_function(function=_stddev, name='stddev', arity=1)
product = make_function(function=_product, name='product', arity=1)
ts_rank = make_function(function=_ts_rank, name='ts_rank', arity=1)
ts_min = make_function(function=_ts_min, name='ts_min', arity=1)
ts_max = make_function(function=_ts_max, name='ts_max', arity=1)
ts_argmax = make_function(function=_ts_argmax, name='ts_argmax', arity=1)
ts_argmin = make_function(function=_ts_argmin, name='ts_argmin', arity=1)
ts_sum = make_function(function=_ts_sum, name='ts_sum', arity=1)

init_function = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan']
user_function = [delta, delay, rank, scale, sma, stddev, product, ts_rank, ts_min, ts_max, ts_argmax, ts_argmin, ts_sum]
Example #28
0
def pd_col_genetic_transform(df=None, col=None, pars=None):
    """
        Find Symbolic formulae for faeture engineering

    """
    prefix = 'col_genetic'
    ######################################################################################
    from gplearn.genetic import SymbolicTransformer
    from gplearn.functions import make_function
    import random

    colX = col  # [col_ for col_ in col if col_ not in coly]
    train_X = df[colX].fillna(method='ffill')
    feature_name_ = colX

    def squaree(x):
        return x * x

    square_ = make_function(function=squaree, name='square_', arity=1)

    function_set = pars.get('function_set', [
        'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'tan',
        square_
    ])
    pars_genetic = pars.get(
        'pars_genetic',
        {
            'generations': 5,
            'population_size': 10,  ### Higher than nb_features
            'metric': 'spearman',
            'tournament_size': 20,
            'stopping_criteria': 1.0,
            'const_range': (-1., 1.),
            'p_crossover': 0.9,
            'p_subtree_mutation': 0.01,
            'p_hoist_mutation': 0.01,
            'p_point_mutation': 0.01,
            'p_point_replace': 0.05,
            'parsimony_coefficient': 0.005,  ####   0.00005 Control Complexity
            'max_samples': 0.9,
            'verbose': 1,

            #'n_components'      ### Control number of outtput features  : n_components
            'random_state': 0,
            'n_jobs': 4,
        })

    if 'path_pipeline' in pars:  #### Inference time
        gp = load(pars['path_pipeline'] + f"/{prefix}_model.pkl")
        pars = load(pars['path_pipeline'] + f"/{prefix}_pars.pkl")
    else:  ### Training time
        coly = pars['coly']
        train_y = pars['dfy']
        gp = SymbolicTransformer(
            hall_of_fame=train_X.shape[1] + 1,  ### Buggy
            n_components=pars_genetic.get('n_components', train_X.shape[1]),
            feature_names=feature_name_,
            function_set=function_set,
            **pars_genetic)
        gp.fit(train_X, train_y)

    ##### Transform Data  #########################################
    df_genetic = gp.transform(train_X)
    tag = random.randint(0, 10)  #### UNIQUE TAG
    col_genetic = [f"gen_{tag}_{i}" for i in range(df_genetic.shape[1])]
    df_genetic = pd.DataFrame(df_genetic,
                              columns=col_genetic,
                              index=train_X.index)
    df_genetic.index = train_X.index
    pars_gen_all = {'pars_genetic': pars_genetic, 'function_set': function_set}

    ##### Formulae Exrraction #####################################
    formula = str(gp).replace("[", "").replace("]", "")
    flist = formula.split(",\n")
    form_dict = {x: flist[i] for i, x in enumerate(col_genetic)}
    pars_gen_all['formulae_dict'] = form_dict
    log("########## Formulae ", form_dict)
    # col_pars['map_dict'] = dict(zip(train_X.columns.to_list(), feature_name_))

    col_new = col_genetic

    ###################################################################################
    if 'path_features_store' in pars and 'path_pipeline_export' in pars:
        save_features(df_genetic, 'df_genetic', pars['path_features_store'])
        save(gp, pars['path_pipeline_export'] + f"/{prefix}_model.pkl")
        save(col_genetic, pars['path_pipeline_export'] + f"/{prefix}.pkl")
        save(pars_gen_all,
             pars['path_pipeline_export'] + f"/{prefix}_pars.pkl")
        # save(form_dict,      pars['path_pipeline_export'] + f"/{prefix}_formula.pkl")
        save_json(form_dict, pars['path_pipeline_export'] +
                  f"/{prefix}_formula.json")  ### Human readable

    col_pars = {
        'prefix': prefix,
        'path': pars.get('path_pipeline_export',
                         pars.get('path_pipeline', None))
    }
    col_pars['cols_new'] = {
        prefix: col_new  ### list
    }
    return df_genetic, col_pars

##TRAINING AND PREDICTING WITH GPLEARN
def tanh(x):
    return np.tanh(x)


def sinh(x):
    return np.sinh(x)


def cosh(x):
    return np.cosh(x)


gp_tanh = make_function(tanh, "tanh", 1)
gp_sinh = make_function(sinh, "sinh", 1)
gp_cosh = make_function(cosh, "cosh", 1)

est_gp = SymbolicRegressor(
    population_size=20000,
    tournament_size=500,
    generations=1,
    stopping_criteria=0.0,
    p_crossover=0.9,
    p_subtree_mutation=0.0001,
    p_hoist_mutation=0.0001,
    p_point_mutation=0.0001,
    max_samples=1.0,
    verbose=1,
    function_set=('add', 'sub', 'mul', 'div', gp_tanh, 'sqrt', 'log', 'abs',