Ejemplo n.º 1
0
def test_subsample():
    """Check that subsample work and that results differ"""

    est1 = SymbolicRegressor(max_samples=1.0, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(max_samples=0.7, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
Ejemplo n.º 2
0
def test_verbose_output():
    """Check verbose=1 does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    true_header = '    |{:^25}|{:^42}|'.format('Population Average',
                                               'Best Individual')
    assert_equal(true_header, header1)

    header2 = verbose_output.readline().rstrip()
    true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10
    assert_equal(true_header, header2)

    header3 = verbose_output.readline().rstrip()

    line_format = '{:>4} {:>8} {:>16} {:>8} {:>16} {:>16} {:>10}'
    true_header = line_format.format('Gen', 'Length', 'Fitness', 'Length',
                                     'Fitness', 'OOB Fitness', 'Time Left')
    assert_equal(true_header, header3)

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)
Ejemplo n.º 3
0
def test_trigonometric():
    """Check that using trig functions work and that results differ"""

    est1 = SymbolicRegressor(random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(
        function_set=['add', 'sub', 'mul', 'div', 'sin', 'cos', 'tan'],
        random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
Ejemplo n.º 4
0
def test_parallel_train():
    """Check predictions are the same for different n_jobs"""

    # Check the regressor
    ests = [
        SymbolicRegressor(population_size=100, generations=4, n_jobs=n_jobs,
                          random_state=0).fit(boston.data[:100, :],
                                              boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.predict(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)

    # Check the transformer
    ests = [
        SymbolicTransformer(population_size=100, hall_of_fame=50,
                            generations=4, n_jobs=n_jobs,
                            random_state=0).fit(boston.data[:100, :],
                                                boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.transform(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)
Ejemplo n.º 5
0
def test_more_verbose_output():
    """Check verbose=2 does not cause error"""

    old_stdout = sys.stdout
    old_stderr = sys.stderr
    sys.stdout = StringIO()
    sys.stderr = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=2)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    joblib_output = sys.stderr
    sys.stdout = old_stdout
    sys.stderr = old_stderr

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)

    joblib_output.seek(0)
    n_lines = sum(1 for l in joblib_output.readlines())
    # New version of joblib appears to output sys.stderr
    assert_equal(0, n_lines % 10)
Ejemplo n.º 6
0
def _gp_fit(arg):
    param = arg[0]
    X = arg[1]
    Y = arg[2]
    est_gp = SymbolicRegressor(
        population_size=param[0],
        generations=450,
        parsimony_coefficient=param[1],
        function_set=param[2].split(" "),
        const_range=(-param[3], param[3]),
    )

    training, validation = splitidx_srs(len(Y))
    X_train = X[training]
    Y_train = Y[training]
    X_validation = X[validation]
    Y_validation = Y[validation]

    try:
        est_gp.fit(X_train, Y_train)
        return (
            param,
            str(est_gp._program),
            est_gp._program.raw_fitness_,
            regression_measures(est_gp.predict(X_validation), Y_validation),
        )
    except Exception as e:
        return (param, "Exception: {}".format(str(e)), 999999999)
Ejemplo n.º 7
0
def test_parsimony_coefficient():
    """Check that parsimony coefficients work and that results differ"""

    est1 = SymbolicRegressor(population_size=100, generations=2,
                             parsimony_coefficient=0.001, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(population_size=100, generations=2,
                             parsimony_coefficient='auto', random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert(abs(est1 - est2) > 0.01)
Ejemplo n.º 8
0
def symbolic_regressor(f, npoints, xrange):
    X = np.linspace(xrange[0], xrange[1], npoints).reshape((-1, 1))
    y = f(X)

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=20, stopping_criteria=0.01,
                               p_crossover=0.7, p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05, p_point_mutation=0.1,
                               max_samples=0.9, verbose=1,
                               parsimony_coefficient=0.01, random_state=0)

    est_gp.fit(X, y)

    sym_expr = str(est_gp._program)

    converter = {
        'sub': lambda x, y: x - y,
        'div': lambda x, y: x / y,
        'mul': lambda x, y: x * y,
        'add': lambda x, y: x + y,
        'neg': lambda x: -x,
        'pow': lambda x, y: x ** y
    }

    x, X0 = symbols('x X0')
    sym_reg = simplify(sympify(sym_expr, locals=converter))
    sym_reg = sym_reg.subs(X0, x)

    Y_true = y.reshape((-1, 1))
    Y_est = np.array([sympify(str(sym_reg)).subs(x, X[k]) for k in range(len(X))]).reshape((-1, 1))

    R2_perf = compute_Rsquared(Y_true, Y_est)

    return sym_reg, R2_perf
def test_pipeline():
    """Check that SymbolicRegressor/Transformer can work in a pipeline"""

    # Check the regressor
    est = make_pipeline(StandardScaler(),
                        SymbolicRegressor(population_size=50,
                                          generations=5,
                                          tournament_size=5,
                                          random_state=0))
    est.fit(boston.data, boston.target)
    assert_almost_equal(est.score(boston.data, boston.target), -4.00270923)

    # Check the classifier
    est = make_pipeline(StandardScaler(),
                        SymbolicClassifier(population_size=50,
                                           generations=5,
                                           tournament_size=5,
                                           random_state=0))
    est.fit(cancer.data, cancer.target)
    assert_almost_equal(est.score(cancer.data, cancer.target), 0.934973637961)

    # Check the transformer
    est = make_pipeline(SymbolicTransformer(population_size=50,
                                            hall_of_fame=20,
                                            generations=5,
                                            tournament_size=5,
                                            random_state=0),
                        DecisionTreeRegressor())
    est.fit(boston.data, boston.target)
    assert_almost_equal(est.score(boston.data, boston.target), 1.0)
Ejemplo n.º 10
0
def main():
    if len(sys.argv) < 2:
        print("Provide data file name!")
        exit(1)

    filename = sys.argv[1]

    # Training samples
    x = read_nth_column(0, filename)
    x_train = np.ndarray((len(x), ),
                         buffer=np.array(x, dtype=float)).reshape(-1, 1)
    # print(x_train)
    y = read_nth_column(1, filename)
    y_train = np.ndarray((len(y), ), buffer=np.array(y, dtype=float))
    # print(y_train)

    # Testing samples
    X_test = read_nth_column(0, filename)
    y_test = read_nth_column(1, filename)

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=30,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               verbose=1,
                               parsimony_coefficient=0.01,
                               random_state=0,
                               function_set=('add', 'sub', 'mul', 'div', 'sin',
                                             'cos', 'sqrt', 'log'))
    est_gp.fit(x_train, y_train)

    print(est_gp._program)
Ejemplo n.º 11
0
def getSymbolicRegressorModel():
    rng = check_random_state(0)

    # Training samples
    X_train = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_train = X_train[:, 0]**2 - X_train[:, 1]**2 + X_train[:, 1] - 1

    # Testing samples
    X_test = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1

    est_gp = SymbolicRegressor(
        population_size=5000,
        generations=20,
        stopping_criteria=0.01,
        p_crossover=0.7,
        p_subtree_mutation=0.1,
        p_hoist_mutation=0.05,
        p_point_mutation=0.1,
        max_samples=0.9,
        verbose=1,
        parsimony_coefficient=0.01,
        random_state=0,
    )
    est_gp.fit(X_train, y_train)
    return est_gp._program
def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)

    # Check the classifier
    est = SymbolicClassifier(generations=2, random_state=0)
    est.fit(cancer.data[:100, :], cancer.target[:100])
    score = est.score(cancer.data[500:, :], cancer.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(cancer.data[500:, :], cancer.target[500:])
    assert_equal(score, score2)
Ejemplo n.º 13
0
def best_approximate(L,points):
	n=len(L)
	datax=[item[1] for item in L]
	datax.pop()
	datax.pop(0)
	datay=[item[0] for item in L]
	datay.pop()
	datay.pop(0)
	print(datax)
	print(datay)
	X_train, X_test, y_train, y_test = train_test_split(datax, datay, test_size=0.33) # random_state here is a random seed, fixed so that we always get the same results


	sr = SymbolicRegressor(		population_size=500,
    	    generations=20,
    	    stopping_criteria=0.01,	# stop if the mean squared error of the best solution is lower than this
        	function_set=('add', 'sub', 'mul', 'div'), # functions that the symbolic regression can use
        	p_crossover=0.54, 	# probabilities of activation of different genetic operators
        	p_subtree_mutation=0.1,	#
        	p_hoist_mutation=0.05, 	#
        	p_point_mutation=0.3,	#
        	verbose=1,		# print a lot of stuff to screen
	      )

	# launch the evolution
	sr.fit(X_train, y_train)
	Ypred=sr.predict(points)
	return Ypred
Ejemplo n.º 14
0
def test_verbose_output():
    """Check verbose=1 does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25),
                                        'Best Individual'.center(42))
    assert_equal(true_header, header1)

    header2 = verbose_output.readline().rstrip()
    true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10
    assert_equal(true_header, header2)

    header3 = verbose_output.readline().rstrip()
    header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness',
                     'OOB Fitness', 'Time Left')
    true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields
    assert_equal(true_header, header3)

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)
Ejemplo n.º 15
0
def test_low_memory():
    """Check the low_memory functionality works as expected."""

    est = SymbolicRegressor(generations=10, random_state=56, low_memory=True)
    # Check there are no parents
    est.fit(boston.data, boston.target)
    assert_true(est._programs[-2] is None)
Ejemplo n.º 16
0
def run(dataset_train, dataset_test,
        population_size, generations, p_crossover, p_subtree_mutation, p_point_mutation, p_hoist_mutation):

    Xtrain, ytrain = dataset_train[:, :-1], dataset_train[:, -1]
    Xtest,  ytest  = dataset_test[:, :-1],  dataset_test[:, -1]
    
    f_set = ('add', 'sub', 'mul', myDiv, 'sin', 'cos', myLog, mySqrt, myTanh, myExp) 

    est_gp = SymbolicRegressor(
        population_size=population_size,
        generations=generations,
        stopping_criteria=0.01,
        p_crossover=p_crossover,
        p_subtree_mutation=p_subtree_mutation,
        p_hoist_mutation=p_hoist_mutation,
        p_point_mutation=p_point_mutation,
        max_samples=1.0,
        verbose=0,
        parsimony_coefficient=0.05,
        function_set = f_set,
        n_jobs=1
    )
    
    est_gp.fit(Xtrain, ytrain)
    
    return RMSE(est_gp.predict(Xtrain), ytrain), RMSE(est_gp.predict(Xtest), ytest)
Ejemplo n.º 17
0
def experiment(seed, i):
    est_gp = SymbolicRegressor(population_size = pop_size,
                               generations=num_generations, stopping_criteria=0.01,
                               p_crossover=crossover_prob, p_subtree_mutation=mutation_prob,
                               p_hoist_mutation=mutation_prob, p_point_mutation=mutation_prob,
                               function_set = function_set,
                               max_samples=0.9, verbose=1,
                               metric=fit, random_state=seed)

    est_gp.fit(x, y)
    

    
    plt.figure(figsize=(14,5))
    plt.subplot(1,2,1)
    plt.xlabel('Generations', fontsize=24)
    plt.ylabel('Best fitness', fontsize=24)
    plt.plot(est_gp.run_details_['best_fitness'], linewidth=3.0)
    plt.grid()

    plt.subplot(1,2,2)
    plt.xlabel('Generations', fontsize=24)
    plt.ylabel('Best size', fontsize=24)
    plt.plot(est_gp.run_details_['best_length'], linewidth=3.0, color='red')
    plt.grid()

    plt.suptitle('Run {}'.format(i), fontsize=24)
    plt.savefig('plot_{}.eps'.format(seed))
    return est_gp.run_details_
Ejemplo n.º 18
0
def runProgram(X_train,y_train,w):
    SymbolicRegressor(population_size=5000,
                           generations=25, stopping_criteria=0.01,
                           p_crossover=0.65, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1, function_set=['add', 'sub', 'mul', 'div','sqrt'],
                           parsimony_coefficient=0.01, random_state=1, init_depth=(3,6),
                           tournament_size=10,metric='mean absolute error')
Ejemplo n.º 19
0
def test_sample_weight():
    """Check sample_weight param works"""

    # Check constant sample_weight has no effect
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicRegressor(population_size=100, generations=2,
                             random_state=0)
    est1.fit(boston.data, boston.target)
    est2 = SymbolicRegressor(population_size=100, generations=2,
                             random_state=0)
    est2.fit(boston.data, boston.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicRegressor(population_size=100, generations=2,
                             random_state=0)
    est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the classifier
    sample_weight = np.ones(cancer.target.shape[0])
    est1 = SymbolicClassifier(population_size=100, generations=2,
                              random_state=0)
    est1.fit(cancer.data, cancer.target)
    est2 = SymbolicClassifier(population_size=100, generations=2,
                              random_state=0)
    est2.fit(cancer.data, cancer.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicClassifier(population_size=100, generations=2,
                              random_state=0)
    est3.fit(cancer.data, cancer.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the transformer
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicTransformer(population_size=100, generations=2,
                               random_state=0)
    est1 = est1.fit_transform(boston.data, boston.target)
    est2 = SymbolicTransformer(population_size=100, generations=2,
                               random_state=0)
    est2 = est2.fit_transform(boston.data, boston.target,
                              sample_weight=sample_weight)

    assert_array_almost_equal(est1, est2)
Ejemplo n.º 20
0
def test_low_memory_warm_start():
    """Check the warm_start functionality works as expected with low_memory."""

    est = SymbolicRegressor(generations=20, random_state=415, low_memory=True)
    est.fit(boston.data, boston.target)
    cold_fitness = est._program.fitness_
    cold_program = est._program.__str__()

    # Check warm start with low memory gets the same result
    est = SymbolicRegressor(generations=10, random_state=415, low_memory=True)
    est.fit(boston.data, boston.target)
    est.set_params(generations=20, warm_start=True)
    est.fit(boston.data, boston.target)
    warm_fitness = est._program.fitness_
    warm_program = est._program.__str__()
    assert_almost_equal(cold_fitness, warm_fitness)
    assert_equal(cold_program, warm_program)
Ejemplo n.º 21
0
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)
def train(x,y_truth,X_train,y_train,X_test,y_test,target_func,noise_rate,noise_level):
    """
    x:  目标函数的分布范围
    y_truth: 目标函数的真实值
    X_train: 训练数据
    y_train: 训练数据值(带噪声)
    X_test: 测试数据
    y_test: 测试数据值
    noise_rate: 噪声率
    noise_level: 噪声水平
    得出用所有数据进行训练的拟合结果。拟合效果有可能会受噪声数据的影响
    """
    #查看训练所用的数据
    print('---训练数据---')
    print(np.c_[X_train,y_train]) 
    #定义符号回归器
    est_gp = SymbolicRegressor(population_size=5000,
                           function_set=['add','sub','mul','div'],#'sin','sqrt','cos'],#,'cos','sqrt','log','abs','neg','inv','tan'],
                           generations=10, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,metric='mean absolute error',
                           parsimony_coefficient=0.01, random_state=0,const_range=(-1,1))
    #用训练集进行拟合训练   
    est_gp.fit(X_train.reshape(-1,1), y_train)
    #得到测试数据的预测值
    y_pred = est_gp.predict(X_test.reshape(-1,1))
    #得到R^2值
    score_gp = est_gp.score(X_test.reshape(-1,1), y_test)
    #训练集的均方误差
    test_mse = mean_squared_error(y_test,y_pred)
    print('拟合结果',str(est_gp._program))
    print('R^2 : %.6f'%score_gp)
    print('MSE : %.6f'%test_mse)
    
    #可视化目标曲线
    plt.xlabel('$x$',fontsize = 18)
    plt.ylabel('$y$',fontsize = 18)
    plt.plot(x,y_truth,label = target_func)
    plt.legend(loc = 'best',fontsize = 18)
    
    #可视化训练数据集
    plt.scatter(X_train,y_train,label = 'NoisyData',alpha = 0.9)
    plt.legend(loc = 'best',fontsize = 18)
        
    #可视化拟合曲线
    data = np.c_[X_test,y_pred]
    data = data[np.lexsort(data[:,::-1].T)]
    plt.plot(data[:,0], data[:,1], label = 'GP : '+str(est_gp._program))
    
    #标题
    fmt = '$R^2 =\/ {0:.6f}$ , $MSE =\/ {1:.6f}$'.format(score_gp,test_mse)
    plt.title(fmt,fontproperties = 'SimHei',fontsize = 20)   
    plt.legend(loc = 'best',fontsize = 18)
Ejemplo n.º 23
0
def test_gridsearch():
    """Check that SymbolicRegressor can be grid-searched"""

    # Grid search parsimony_coefficient
    parameters = {'parsimony_coefficient': [0.001, 0.1, 'auto']}
    clf = SymbolicRegressor(population_size=50, generations=5,
                            tournament_size=5, random_state=0)
    grid = GridSearchCV(clf, parameters, scoring='neg_mean_absolute_error')
    grid.fit(boston.data, boston.target)
    expected = {'parsimony_coefficient': 0.001}
    assert_equal(grid.best_params_, expected)
Ejemplo n.º 24
0
def test_low_memory():
    """Check the low_memory functionality works as expected."""
    est = SymbolicRegressor(generations=10, random_state=56, low_memory=True)

    # Check there are no parents of parents
    est.fit(boston.data, boston.target)
    idx = est._program.parents['parent_idx']
    assert_false(est._programs[-2][idx] is None)
    assert_true(est._programs[-2][idx].parents is None)

    # Check parent's existence when low_memory off
    est = SymbolicRegressor(generations=10, random_state=56, low_memory=False)
    est.fit(boston.data, boston.target)

    program = est._program
    assert_true(program is not None)
    for gen in np.arange(est.generations, 0, -1):
        assert_true(program.parents is not None)
        idx = program.parents['parent_idx']
        program = est._programs[gen - 1][idx]
        assert_true(program is not None)
Ejemplo n.º 25
0
def regressionOfFailureRate(coords,
                            seed=None,
                            population_size=None,
                            generations=None):
    """
	Pokusí se co nejlépe proložit body \a coords vyjadřující četnost chyb.
	Snaží se při tom aby výsledek byl integrovatelný, ovšem integrovatelnost nezaručuje.
	"""
    if population_size is None:
        population_size = 1000
    if generations is None:
        generations = 20

    # Rozdělení x-ových a y-ových souřadnic pro GpLearn
    X_train, y_train = zip(*(([x], y) for (x, y) in coords))

    from gplearn.genetic import SymbolicRegressor
    # Kolik náhodných čísel gplearn vygeneruje? Není omezeno. Buď se dosadí funkce, proměnná nebo se vygeneruje náhodné číslo z daného intervalu.
    est_gp = SymbolicRegressor(  # Estimator Genetic Programming
        population_size=population_size,
        generations=1,
        tournament_size=20,
        stopping_criteria=0.0,
        const_range=(0.0, 5.0),
        init_depth=(2, 6),
        init_method='half and half',
        function_set=('add', 'mul'),
        metric='mean absolute error',  #metric=sum_absolute_error
        parsimony_coefficient=0.001,
        p_crossover=0.9,
        p_subtree_mutation=0.01,
        p_hoist_mutation=0.01,
        p_point_mutation=0.01,
        p_point_replace=0.05,
        max_samples=1.0,
        warm_start=False,
        n_jobs=-1,
        verbose=VERBOSITY,
        random_state=seed)
    est_gp.fit(X_train, y_train)
    for p in est_gp._programs[0]:
        p.program[
            0] = gplearn.functions.div2  # Všechny kořeny přepíšeme na dělení
    for i in range(1, generations):
        for p in est_gp._programs[i - 1]:
            p.get_subtree = functools.partial(
                get_subtree, p)  # Všem potomkům zakážeme křížení z kořene
        est_gp.set_params(generations=i + 1, warm_start=True)
        est_gp.fit(X_train, y_train)
    best_individual = est_gp._program
    return est_gp, extractExprFromGplearn(best_individual.program)
Ejemplo n.º 26
0
def test_run_details():
    """Check the run_details_ attribute works as expected."""

    est = SymbolicRegressor(generations=5, random_state=415)
    est.fit(boston.data, boston.target)
    # Check generations are indexed as expected without warm_start
    assert_equal(est.run_details_['generation'], list(range(5)))
    est.set_params(generations=10, warm_start=True)
    est.fit(boston.data, boston.target)
    # Check generations are indexed as expected with warm_start
    assert_equal(est.run_details_['generation'], list(range(10)))
    # Check all details have expected number of elements
    for detail in est.run_details_:
        assert_equal(len(est.run_details_[detail]), 10)
def train():
    est_gp = SymbolicRegressor(population_size=150,
                               generations=20,
                               stopping_criteria=0.001,
                               p_crossover=0.8,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.05,
                               max_samples=0.9,
                               verbose=1,
                               metric='mean absolute error',
                               parsimony_coefficient=0.01)
    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
Ejemplo n.º 28
0
def test_input_shape():
    """Check changed dimensions cause failure"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    X2 = np.reshape(random_state.uniform(size=45), (5, 9))

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.predict, X2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.transform, X2)
Ejemplo n.º 29
0
def RunSR(X_train, y_train, X_test, weights, SR_metric, sqrt, rand_state):
    operators_set=['add', 'sub', 'mul', 'div']
    if sqrt==1: operators_set = operators_set + ['sqrt']
    SR = SymbolicRegressor(population_size=5000,
                           generations=20, stopping_criteria=0.01,
                           p_crossover=0.65, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=1, verbose=0, function_set = operators_set,
                           parsimony_coefficient=0.001, random_state=rand_state, init_depth=(2,6),
                           tournament_size=20,metric = SR_metric)
    if weights>0:
        weights_vector = GenerateWeights(X_train, weights)
        SR.fit(X_train, y_train, weights_vector)
    else:
        SR.fit(X_train, y_train)
    print(SR._program)
    return SR
Ejemplo n.º 30
0
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(population_size=100, generations=2,
                             stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert(len(est1._programs) == 1)

    est1 = SymbolicTransformer(population_size=100, generations=2,
                               stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert(len(est1._programs) == 1)

    est1 = SymbolicClassifier(population_size=100, generations=2,
                              stopping_criteria=.9, random_state=0)
    est1.fit(cancer.data[:400, :], cancer.target[:400])
    assert(len(est1._programs) == 1)