Ejemplo n.º 1
0
def test_verbose_output():
    """Check verbose=1 does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25),
                                        'Best Individual'.center(42))
    assert_equal(true_header, header1)

    header2 = verbose_output.readline().rstrip()
    true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10
    assert_equal(true_header, header2)

    header3 = verbose_output.readline().rstrip()
    header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness',
                     'OOB Fitness', 'Time Left')
    true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields
    assert_equal(true_header, header3)

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)
Ejemplo n.º 2
0
def test_customized_regressor_metrics():
    """Check whether greater_is_better works for SymbolicRegressor."""

    x_data = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2

    est_gp = SymbolicRegressor(metric='mean absolute error',
                               stopping_criteria=0.000001, random_state=415,
                               parsimony_coefficient=0.001, init_method='full',
                               init_depth=(2, 4))
    est_gp.fit(x_data, y_true)
    formula = est_gp.__str__()
    assert_equal('add(mul(X1, X1), mul(X0, X0))', formula, True)

    def neg_mean_absolute_error(y, y_pred, sample_weight):
        return -1 * mean_absolute_error(y, y_pred, sample_weight)

    customized_fitness = make_fitness(neg_mean_absolute_error,
                                      greater_is_better=True)

    c_est_gp = SymbolicRegressor(metric=customized_fitness,
                                 stopping_criteria=-0.000001, random_state=415,
                                 parsimony_coefficient=0.001, verbose=0,
                                 init_method='full', init_depth=(2, 4))
    c_est_gp.fit(x_data, y_true)
    c_formula = c_est_gp.__str__()
    assert_equal('add(mul(X1, X1), mul(X0, X0))', c_formula, True)
Ejemplo n.º 3
0
def test_none_const_range():
    """Check that const_range=None produces no constants"""

    # Check with None as const_range
    est = SymbolicRegressor(population_size=100, generations=2,
                            const_range=None)
    est.fit(boston.data, boston.target)
    float_count = 0
    for generation in est._programs:
        for program in generation:
            if program is None:
                continue
            for element in program.program:
                if isinstance(element, float):
                    float_count += 1
    assert(float_count == 0)

    # Check with default const_range
    est = SymbolicRegressor(population_size=100, generations=2)
    est.fit(boston.data, boston.target)
    float_count = 0
    for generation in est._programs:
        for program in generation:
            if program is None:
                continue
            for element in program.program:
                if isinstance(element, float):
                    float_count += 1
    assert(float_count > 1)
Ejemplo n.º 4
0
def _gp_fit(arg):
    param = arg[0]
    X = arg[1]
    Y = arg[2]
    est_gp = SymbolicRegressor(
        population_size=param[0],
        generations=450,
        parsimony_coefficient=param[1],
        function_set=param[2].split(" "),
        const_range=(-param[3], param[3]),
    )

    training, validation = splitidx_srs(len(Y))
    X_train = X[training]
    Y_train = Y[training]
    X_validation = X[validation]
    Y_validation = Y[validation]

    try:
        est_gp.fit(X_train, Y_train)
        return (
            param,
            str(est_gp._program),
            est_gp._program.raw_fitness_,
            regression_measures(est_gp.predict(X_validation), Y_validation),
        )
    except Exception as e:
        return (param, "Exception: {}".format(str(e)), 999999999)
Ejemplo n.º 5
0
def test_verbose_output():
    """Check verbose=1 does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25),
                                        'Best Individual'.center(42))
    assert_equal(true_header, header1)

    header2 = verbose_output.readline().rstrip()
    true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10
    assert_equal(true_header, header2)

    header3 = verbose_output.readline().rstrip()
    header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness',
                     'OOB Fitness', 'Time Left')
    true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields
    assert_equal(true_header, header3)

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(10, n_lines)
Ejemplo n.º 6
0
def experiment(seed, i):
    est_gp = SymbolicRegressor(population_size = pop_size,
                               generations=num_generations, stopping_criteria=0.01,
                               p_crossover=crossover_prob, p_subtree_mutation=mutation_prob,
                               p_hoist_mutation=mutation_prob, p_point_mutation=mutation_prob,
                               function_set = function_set,
                               max_samples=0.9, verbose=1,
                               metric=fit, random_state=seed)

    est_gp.fit(x, y)
    

    
    plt.figure(figsize=(14,5))
    plt.subplot(1,2,1)
    plt.xlabel('Generations', fontsize=24)
    plt.ylabel('Best fitness', fontsize=24)
    plt.plot(est_gp.run_details_['best_fitness'], linewidth=3.0)
    plt.grid()

    plt.subplot(1,2,2)
    plt.xlabel('Generations', fontsize=24)
    plt.ylabel('Best size', fontsize=24)
    plt.plot(est_gp.run_details_['best_length'], linewidth=3.0, color='red')
    plt.grid()

    plt.suptitle('Run {}'.format(i), fontsize=24)
    plt.savefig('plot_{}.eps'.format(seed))
    return est_gp.run_details_
Ejemplo n.º 7
0
def test_more_verbose_output():
    """Check verbose=2 does not cause error"""

    old_stdout = sys.stdout
    old_stderr = sys.stderr
    sys.stdout = StringIO()
    sys.stderr = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=2)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    joblib_output = sys.stderr
    sys.stdout = old_stdout
    sys.stderr = old_stderr

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(10, n_lines)

    joblib_output.seek(0)
    n_lines = sum(1 for l in joblib_output.readlines())
    assert_equal(20, n_lines)
Ejemplo n.º 8
0
def getSymbolicRegressorModel():
    rng = check_random_state(0)

    # Training samples
    X_train = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_train = X_train[:, 0]**2 - X_train[:, 1]**2 + X_train[:, 1] - 1

    # Testing samples
    X_test = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1

    est_gp = SymbolicRegressor(
        population_size=5000,
        generations=20,
        stopping_criteria=0.01,
        p_crossover=0.7,
        p_subtree_mutation=0.1,
        p_hoist_mutation=0.05,
        p_point_mutation=0.1,
        max_samples=0.9,
        verbose=1,
        parsimony_coefficient=0.01,
        random_state=0,
    )
    est_gp.fit(X_train, y_train)
    return est_gp._program
Ejemplo n.º 9
0
def test_verbose_output():
    """Check verbose=1 does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    true_header = '    |{:^25}|{:^42}|'.format('Population Average',
                                               'Best Individual')
    assert_equal(true_header, header1)

    header2 = verbose_output.readline().rstrip()
    true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10
    assert_equal(true_header, header2)

    header3 = verbose_output.readline().rstrip()

    line_format = '{:>4} {:>8} {:>16} {:>8} {:>16} {:>16} {:>10}'
    true_header = line_format.format('Gen', 'Length', 'Fitness', 'Length',
                                     'Fitness', 'OOB Fitness', 'Time Left')
    assert_equal(true_header, header3)

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)
Ejemplo n.º 10
0
def test_more_verbose_output():
    """Check verbose=2 does not cause error"""

    old_stdout = sys.stdout
    old_stderr = sys.stderr
    sys.stdout = StringIO()
    sys.stderr = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=2)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    joblib_output = sys.stderr
    sys.stdout = old_stdout
    sys.stderr = old_stderr

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)

    joblib_output.seek(0)
    n_lines = sum(1 for l in joblib_output.readlines())
    # New version of joblib appears to output sys.stderr
    assert_equal(0, n_lines % 10)
Ejemplo n.º 11
0
def main():
    if len(sys.argv) < 2:
        print("Provide data file name!")
        exit(1)

    filename = sys.argv[1]

    # Training samples
    x = read_nth_column(0, filename)
    x_train = np.ndarray((len(x), ),
                         buffer=np.array(x, dtype=float)).reshape(-1, 1)
    # print(x_train)
    y = read_nth_column(1, filename)
    y_train = np.ndarray((len(y), ), buffer=np.array(y, dtype=float))
    # print(y_train)

    # Testing samples
    X_test = read_nth_column(0, filename)
    y_test = read_nth_column(1, filename)

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=30,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               verbose=1,
                               parsimony_coefficient=0.01,
                               random_state=0,
                               function_set=('add', 'sub', 'mul', 'div', 'sin',
                                             'cos', 'sqrt', 'log'))
    est_gp.fit(x_train, y_train)

    print(est_gp._program)
Ejemplo n.º 12
0
def test_low_memory():
    """Check the low_memory functionality works as expected."""

    est = SymbolicRegressor(generations=10, random_state=56, low_memory=True)
    # Check there are no parents
    est.fit(boston.data, boston.target)
    assert_true(est._programs[-2] is None)
Ejemplo n.º 13
0
def best_approximate(L,points):
	n=len(L)
	datax=[item[1] for item in L]
	datax.pop()
	datax.pop(0)
	datay=[item[0] for item in L]
	datay.pop()
	datay.pop(0)
	print(datax)
	print(datay)
	X_train, X_test, y_train, y_test = train_test_split(datax, datay, test_size=0.33) # random_state here is a random seed, fixed so that we always get the same results


	sr = SymbolicRegressor(		population_size=500,
    	    generations=20,
    	    stopping_criteria=0.01,	# stop if the mean squared error of the best solution is lower than this
        	function_set=('add', 'sub', 'mul', 'div'), # functions that the symbolic regression can use
        	p_crossover=0.54, 	# probabilities of activation of different genetic operators
        	p_subtree_mutation=0.1,	#
        	p_hoist_mutation=0.05, 	#
        	p_point_mutation=0.3,	#
        	verbose=1,		# print a lot of stuff to screen
	      )

	# launch the evolution
	sr.fit(X_train, y_train)
	Ypred=sr.predict(points)
	return Ypred
Ejemplo n.º 14
0
def test_none_const_range():
    """Check that const_range=None produces no constants"""

    # Check with None as const_range
    est = SymbolicRegressor(const_range=None, generations=2)
    est.fit(boston.data, boston.target)
    float_count = 0
    for generation in est._programs:
        for program in generation:
            if program is None:
                continue
            for element in program.program:
                if type(element) == float:
                    float_count += 1
    assert_true(float_count == 0)

    # Check with default const_range
    est = SymbolicRegressor(generations=2)
    est.fit(boston.data, boston.target)
    float_count = 0
    for generation in est._programs:
        for program in generation:
            if program is None:
                continue
            for element in program.program:
                if type(element) == float:
                    float_count += 1
    assert_true(float_count > 1)
Ejemplo n.º 15
0
def run(dataset_train, dataset_test,
        population_size, generations, p_crossover, p_subtree_mutation, p_point_mutation, p_hoist_mutation):

    Xtrain, ytrain = dataset_train[:, :-1], dataset_train[:, -1]
    Xtest,  ytest  = dataset_test[:, :-1],  dataset_test[:, -1]
    
    f_set = ('add', 'sub', 'mul', myDiv, 'sin', 'cos', myLog, mySqrt, myTanh, myExp) 

    est_gp = SymbolicRegressor(
        population_size=population_size,
        generations=generations,
        stopping_criteria=0.01,
        p_crossover=p_crossover,
        p_subtree_mutation=p_subtree_mutation,
        p_hoist_mutation=p_hoist_mutation,
        p_point_mutation=p_point_mutation,
        max_samples=1.0,
        verbose=0,
        parsimony_coefficient=0.05,
        function_set = f_set,
        n_jobs=1
    )
    
    est_gp.fit(Xtrain, ytrain)
    
    return RMSE(est_gp.predict(Xtrain), ytrain), RMSE(est_gp.predict(Xtest), ytest)
Ejemplo n.º 16
0
def symbolic_regressor(f, npoints, xrange):
    X = np.linspace(xrange[0], xrange[1], npoints).reshape((-1, 1))
    y = f(X)

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=20, stopping_criteria=0.01,
                               p_crossover=0.7, p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05, p_point_mutation=0.1,
                               max_samples=0.9, verbose=1,
                               parsimony_coefficient=0.01, random_state=0)

    est_gp.fit(X, y)

    sym_expr = str(est_gp._program)

    converter = {
        'sub': lambda x, y: x - y,
        'div': lambda x, y: x / y,
        'mul': lambda x, y: x * y,
        'add': lambda x, y: x + y,
        'neg': lambda x: -x,
        'pow': lambda x, y: x ** y
    }

    x, X0 = symbols('x X0')
    sym_reg = simplify(sympify(sym_expr, locals=converter))
    sym_reg = sym_reg.subs(X0, x)

    Y_true = y.reshape((-1, 1))
    Y_est = np.array([sympify(str(sym_reg)).subs(x, X[k]) for k in range(len(X))]).reshape((-1, 1))

    R2_perf = compute_Rsquared(Y_true, Y_est)

    return sym_reg, R2_perf
def train():
    est_gp = SymbolicRegressor(population_size=150,
                               generations=20, stopping_criteria=0.001,
                               p_crossover=0.8, p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05, p_point_mutation=0.05,
                               max_samples=0.9, verbose=1, metric='mean absolute error',
                               parsimony_coefficient=0.01)
    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
Ejemplo n.º 18
0
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)
Ejemplo n.º 19
0
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)
def train(x,y_truth,X_train,y_train,X_test,y_test,target_func,noise_rate,noise_level):
    """
    x:  目标函数的分布范围
    y_truth: 目标函数的真实值
    X_train: 训练数据
    y_train: 训练数据值(带噪声)
    X_test: 测试数据
    y_test: 测试数据值
    noise_rate: 噪声率
    noise_level: 噪声水平
    得出用所有数据进行训练的拟合结果。拟合效果有可能会受噪声数据的影响
    """
    #查看训练所用的数据
    print('---训练数据---')
    print(np.c_[X_train,y_train]) 
    #定义符号回归器
    est_gp = SymbolicRegressor(population_size=5000,
                           function_set=['add','sub','mul','div'],#'sin','sqrt','cos'],#,'cos','sqrt','log','abs','neg','inv','tan'],
                           generations=10, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,metric='mean absolute error',
                           parsimony_coefficient=0.01, random_state=0,const_range=(-1,1))
    #用训练集进行拟合训练   
    est_gp.fit(X_train.reshape(-1,1), y_train)
    #得到测试数据的预测值
    y_pred = est_gp.predict(X_test.reshape(-1,1))
    #得到R^2值
    score_gp = est_gp.score(X_test.reshape(-1,1), y_test)
    #训练集的均方误差
    test_mse = mean_squared_error(y_test,y_pred)
    print('拟合结果',str(est_gp._program))
    print('R^2 : %.6f'%score_gp)
    print('MSE : %.6f'%test_mse)
    
    #可视化目标曲线
    plt.xlabel('$x$',fontsize = 18)
    plt.ylabel('$y$',fontsize = 18)
    plt.plot(x,y_truth,label = target_func)
    plt.legend(loc = 'best',fontsize = 18)
    
    #可视化训练数据集
    plt.scatter(X_train,y_train,label = 'NoisyData',alpha = 0.9)
    plt.legend(loc = 'best',fontsize = 18)
        
    #可视化拟合曲线
    data = np.c_[X_test,y_pred]
    data = data[np.lexsort(data[:,::-1].T)]
    plt.plot(data[:,0], data[:,1], label = 'GP : '+str(est_gp._program))
    
    #标题
    fmt = '$R^2 =\/ {0:.6f}$ , $MSE =\/ {1:.6f}$'.format(score_gp,test_mse)
    plt.title(fmt,fontproperties = 'SimHei',fontsize = 20)   
    plt.legend(loc = 'best',fontsize = 18)
Ejemplo n.º 21
0
def regressionOfFailureRate(coords,
                            seed=None,
                            population_size=None,
                            generations=None):
    """
	Pokusí se co nejlépe proložit body \a coords vyjadřující četnost chyb.
	Snaží se při tom aby výsledek byl integrovatelný, ovšem integrovatelnost nezaručuje.
	"""
    if population_size is None:
        population_size = 1000
    if generations is None:
        generations = 20

    # Rozdělení x-ových a y-ových souřadnic pro GpLearn
    X_train, y_train = zip(*(([x], y) for (x, y) in coords))

    from gplearn.genetic import SymbolicRegressor
    # Kolik náhodných čísel gplearn vygeneruje? Není omezeno. Buď se dosadí funkce, proměnná nebo se vygeneruje náhodné číslo z daného intervalu.
    est_gp = SymbolicRegressor(  # Estimator Genetic Programming
        population_size=population_size,
        generations=1,
        tournament_size=20,
        stopping_criteria=0.0,
        const_range=(0.0, 5.0),
        init_depth=(2, 6),
        init_method='half and half',
        function_set=('add', 'mul'),
        metric='mean absolute error',  #metric=sum_absolute_error
        parsimony_coefficient=0.001,
        p_crossover=0.9,
        p_subtree_mutation=0.01,
        p_hoist_mutation=0.01,
        p_point_mutation=0.01,
        p_point_replace=0.05,
        max_samples=1.0,
        warm_start=False,
        n_jobs=-1,
        verbose=VERBOSITY,
        random_state=seed)
    est_gp.fit(X_train, y_train)
    for p in est_gp._programs[0]:
        p.program[
            0] = gplearn.functions.div2  # Všechny kořeny přepíšeme na dělení
    for i in range(1, generations):
        for p in est_gp._programs[i - 1]:
            p.get_subtree = functools.partial(
                get_subtree, p)  # Všem potomkům zakážeme křížení z kořene
        est_gp.set_params(generations=i + 1, warm_start=True)
        est_gp.fit(X_train, y_train)
    best_individual = est_gp._program
    return est_gp, extractExprFromGplearn(best_individual.program)
Ejemplo n.º 22
0
def test_run_details():
    """Check the run_details_ attribute works as expected."""

    est = SymbolicRegressor(generations=5, random_state=415)
    est.fit(boston.data, boston.target)
    # Check generations are indexed as expected without warm_start
    assert_equal(est.run_details_['generation'], list(range(5)))
    est.set_params(generations=10, warm_start=True)
    est.fit(boston.data, boston.target)
    # Check generations are indexed as expected with warm_start
    assert_equal(est.run_details_['generation'], list(range(10)))
    # Check all details have expected number of elements
    for detail in est.run_details_:
        assert_equal(len(est.run_details_[detail]), 10)
Ejemplo n.º 23
0
def test_subsample():
    """Check that subsample work and that results differ"""

    est1 = SymbolicRegressor(max_samples=1.0, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(max_samples=0.7, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
Ejemplo n.º 24
0
def test_subsample():
    """Check that subsample work and that results differ"""

    est1 = SymbolicRegressor(max_samples=1.0, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(max_samples=0.7, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
Ejemplo n.º 25
0
def test_trigonometric():
    """Check that using trig functions work and that results differ"""

    est1 = SymbolicRegressor(random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(trigonometric=True, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
def train():
    est_gp = SymbolicRegressor(population_size=150,
                               generations=20,
                               stopping_criteria=0.001,
                               p_crossover=0.8,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.05,
                               max_samples=0.9,
                               verbose=1,
                               metric='mean absolute error',
                               parsimony_coefficient=0.01)
    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
Ejemplo n.º 27
0
def test_parsimony_coefficient():
    """Check that parsimony coefficients work and that results differ"""

    est1 = SymbolicRegressor(population_size=100, generations=2,
                             parsimony_coefficient=0.001, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(population_size=100, generations=2,
                             parsimony_coefficient='auto', random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert(abs(est1 - est2) > 0.01)
Ejemplo n.º 28
0
def test_trigonometric():
    """Check that using trig functions work and that results differ"""

    est1 = SymbolicRegressor(random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(
        function_set=['add', 'sub', 'mul', 'div', 'sin', 'cos', 'tan'],
        random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
Ejemplo n.º 29
0
def test_input_shape():
    """Check changed dimensions cause failure"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    X2 = np.reshape(random_state.uniform(size=45), (5, 9))

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.predict, X2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.transform, X2)
Ejemplo n.º 30
0
def RunSR(X_train, y_train, X_test, weights, SR_metric, sqrt, rand_state):
    operators_set=['add', 'sub', 'mul', 'div']
    if sqrt==1: operators_set = operators_set + ['sqrt']
    SR = SymbolicRegressor(population_size=5000,
                           generations=20, stopping_criteria=0.01,
                           p_crossover=0.65, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=1, verbose=0, function_set = operators_set,
                           parsimony_coefficient=0.001, random_state=rand_state, init_depth=(2,6),
                           tournament_size=20,metric = SR_metric)
    if weights>0:
        weights_vector = GenerateWeights(X_train, weights)
        SR.fit(X_train, y_train, weights_vector)
    else:
        SR.fit(X_train, y_train)
    print(SR._program)
    return SR
Ejemplo n.º 31
0
def test_input_shape():
    """Check changed dimensions cause failure"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    X2 = np.reshape(random_state.uniform(size=45), (5, 9))

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.predict, X2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.transform, X2)
Ejemplo n.º 32
0
def main():
    x = np.genfromtxt('x_train.csv', delimiter=',').reshape((1000, 1))
    y = np.genfromtxt('y_train.csv', delimiter=',')
    est_gp = SymbolicRegressor(population_size=50,
                               generations=20,
                               stopping_criteria=0.01,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               max_samples=0.9,
                               verbose=1,
                               parsimony_coefficient=0.01,
                               random_state=0)
    est_gp.fit(x, y)
    print(est_gp._program)

    est_tree = DecisionTreeRegressor()
    est_tree.fit(x, y)
    est_rf = RandomForestRegressor()
    est_rf.fit(x, y)

    x0 = np.arange(-1, 1, 1 / 10.)
    x1 = np.arange(-1, 1, 1 / 10.)
    x0, x1 = np.meshgrid(x0, x1)
    y_truth = 3 * x0**2 + 5 * x0 + 1  # exact function we are estimating

    y_gp = est_gp.predict(np.c_[x0.ravel()]).reshape(x0.shape)
    score_gp = est_gp.score(x, y)
    y_tree = est_tree.predict(np.c_[x0.ravel()]).reshape(x0.shape)
    score_tree = est_tree.score(x, y)
    y_rf = est_rf.predict(np.c_[x0.ravel()]).reshape(x0.shape)
    score_rf = est_rf.score(x, y)

    for i, (ys, score,
            title) in enumerate([(y_truth, None, "Ground Truth"),
                                 (y_gp, score_gp, "SymbolicRegressor"),
                                 (y_tree, score_tree, "DecisionTreeRegressor"),
                                 (y_rf, score_rf, "RandomForestRegressor")]):
        plt.subplot(2, 2, i + 1)
        plt.plot(x0, ys, 'C0o')
        plt.grid(True, which='both')
        plt.axhline(y=0, color='k')
        plt.axvline(x=0, color='k')
    plt.show()
Ejemplo n.º 33
0
def pca_gp(rows, features, function):
    run_results = {}
    for run_number in range(0, NUMBER_OF_RUNS):
        # Generating random data
        rng = check_random_state(run_number)
        X = rng.uniform(-1, 1, rows).reshape(rows // features, features)
        Y = function(X)

        # Dividing it into training and test set
        X_train, X_test, y_train, y_test = train_test_split(
            X, Y, test_size=TEST_SIZE, random_state=0)

        # Convert it to PCA
        pca = PCA(n_components=1)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)

        # Training the system
        est_gp = SymbolicRegressor(population_size=POPULATION_SIZE,
                                   generations=NUMBER_OF_GENERATION,
                                   stopping_criteria=0.01,
                                   p_crossover=0.7,
                                   p_subtree_mutation=0.1,
                                   p_hoist_mutation=0.05,
                                   p_point_mutation=0.1,
                                   max_samples=0.9,
                                   verbose=1,
                                   parsimony_coefficient=0.01,
                                   random_state=0,
                                   n_jobs=-1)
        est_gp.fit(X_train_pca, y_train)
        generation_results = []
        for idGen in range(len(est_gp._programs)):
            single_generation = {}
            single_generation[idGen] = math.inf
            for idPopulation in range(est_gp.population_size):
                if (est_gp._programs[idGen][idPopulation] != None):
                    if est_gp._programs[idGen][
                            idPopulation].raw_fitness_ < single_generation[
                                idGen]:
                        single_generation[idGen] = est_gp._programs[idGen][
                            idPopulation].raw_fitness_
            generation_results.append(single_generation)
        run_results[run_number] = generation_results
    return run_results
Ejemplo n.º 34
0
def main():
    data = read_data()
    regressor = SymbolicRegressor(population_size=1000,
                                  generations=100,
                                  const_range=(.0, .0),
                                  init_depth=(2, 10),
                                  init_method='grow',
                                  function_set=('add', 'sub', 'mul', 'div',
                                                'log', 'sin', 'cos'),
                                  p_crossover=0.7,
                                  p_subtree_mutation=0.0,
                                  p_hoist_mutation=0.0,
                                  p_point_mutation=0.0,
                                  verbose=1,
                                  n_jobs=-1)
    (n, _) = data.shape
    regressor.fit(data[:, 0].reshape(n, 1), data[:, 1])
    print(regressor._program)
Ejemplo n.º 35
0
def test_verbose_with_oob():
    """Check oob scoring for subsample does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)
Ejemplo n.º 36
0
def test_verbose_with_oob():
    """Check oob scoring for subsample does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(10, n_lines)
Ejemplo n.º 37
0
    def fit(self, x_data):
        est_gp = SymbolicRegressor(population_size=500,
                                   generations=10,
                                   stopping_criteria=0.0001,
                                   p_crossover=0.7,
                                   p_subtree_mutation=0.1,
                                   p_hoist_mutation=0.05,
                                   p_point_mutation=0.1,
                                   metric=make_fitness(
                                       self.make_explict_func(), False),
                                   function_set=self.function_set,
                                   verbose=1,
                                   parsimony_coefficient=0.01)

        indicies = np.arange(x_data.shape[0])
        est_gp.fit(x_data, indicies)

        return est_gp
Ejemplo n.º 38
0
def test_verbose_with_oob():
    """Check oob scoring for subsample does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(population_size=100, generations=10,
                            max_samples=0.9, random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    # Ignore header rows
    _ = verbose_output.readline().rstrip()
    _ = verbose_output.readline().rstrip()
    _ = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(10, n_lines)
Ejemplo n.º 39
0
def test_sample_weight():
    """Check sample_weight param works"""

    # Check constant sample_weight has no effect
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicRegressor(generations=2, random_state=0)
    est1.fit(boston.data, boston.target)
    est2 = SymbolicRegressor(generations=2, random_state=0)
    est2.fit(boston.data, boston.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicRegressor(generations=2, random_state=0)
    est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the transformer
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicTransformer(generations=2, random_state=0)
    est1 = est1.fit_transform(boston.data, boston.target)
    est2 = SymbolicTransformer(generations=2, random_state=0)
    est2 = est2.fit_transform(boston.data, boston.target,
                              sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicTransformer(generations=2, random_state=0)
    est3 = est3.fit_transform(boston.data, boston.target,
                              sample_weight=sample_weight * 1.1)

    assert_array_almost_equal(est1, est2)
    assert_array_almost_equal(est1, est3)
Ejemplo n.º 40
0
def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)
Ejemplo n.º 41
0
def test_parsimony_coefficient():
    """Check that parsimony coefficients work and that results differ"""

    est1 = SymbolicRegressor(parsimony_coefficient=0.001, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(parsimony_coefficient=0.1, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    est3 = SymbolicRegressor(parsimony_coefficient='auto', random_state=0)
    est3.fit(boston.data[:400, :], boston.target[:400])
    est3 = mean_absolute_error(est3.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
    assert_true(abs(est1 - est3) > 0.01)
    assert_true(abs(est2 - est3) > 0.01)
Ejemplo n.º 42
0
def test_print_overloading_estimator():
    """Check that printing a fitted estimator results in 'pretty' output"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)

    # Unfitted
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_unfitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    # Fitted
    est.fit(X, y)
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_fitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est._program)
        output_program = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    assert_true(output_unfitted != output_fitted)
    assert_true(output_unfitted == est.__repr__())
    assert_true(output_fitted == output_program)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)

    # Unfitted
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_unfitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    # Fitted
    est.fit(X, y)
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_fitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        output = str([gp.__str__() for gp in est])
        print(output.replace("',", ",\n").replace("'", ""))
        output_program = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    assert_true(output_unfitted != output_fitted)
    assert_true(output_unfitted == est.__repr__())
    assert_true(output_fitted == output_program)
Ejemplo n.º 43
0
def main():
    escalation = {
        Position.GOALKEEPER: 1,
        Position.DEFENDER: 2,
        Position.SIDE: 2,
        Position.MIDFIELD: 4,
        Position.ATTACKER: 2,
        Position.COACH: 1
    }

    print("Getting auth")
    auth = get_auth()

    print("Getting teams")
    teams = get_teams()

    print("Getting athletes")
    athletes = get_athletes(teams)

    print("Getting scores")
    scores = [athlete.get_row(auth) for athlete in athletes]
    max_length = 0
    for score in scores:
        if len(score) > max_length:
            max_length = len(score)
    fixed_score = []
    for score in scores:
        fixed_score.append([0.0] * (max_length - len(score)) + score)

    generations = 2000
    print("Training using " + str(generations) + " generations. It can take a long time to end")
    est_gp = SymbolicRegressor(
        population_size=5000,
        generations=generations,
        stopping_criteria=0.01,
        p_crossover=0.7,
        p_subtree_mutation=0.1,
        p_hoist_mutation=0.05,
        p_point_mutation=0.1,
        max_samples=0.9,
        verbose=1,
        parsimony_coefficient=0.01,
        random_state=0,
        const_range=(-50., 50.),
        function_set=(
            'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan'))
    est_gp.fit([x[:-1] for x in fixed_score], [x[-1] for x in fixed_score])
    predictions = est_gp.predict([x[:-1] for x in fixed_score])

    print("Getting results")
    results = [[athlete, prediction] for athlete, prediction in zip(athletes, predictions)]
    results.sort(key=lambda x: -x[1])
    print("\"Scale\",\"Name\",\"Team\",\"Position\",\"Status\",\"Price\",\"Prediction\"")
    for result in results:
        athlete = result[0]
        prediction = result[1]
        scale = athlete.status == Status.Probable and escalation[athlete.position] > 0
        if scale:
            escalation[athlete.position] = escalation[athlete.position] - 1
        print("\"" +
              ("*" if scale else " ") + "\",\"" +
              athlete.nick + "\",\"" +
              athlete.club.name + "\",\"" +
              str(athlete.position.name) + "\",\"" +
              str(athlete.status.name) + "\"," +
              str(athlete.price) + "," +
              str(prediction))

    print("Done")