Example #1
0
def test_low_memory():
    """Check the low_memory functionality works as expected."""

    est = SymbolicRegressor(generations=10, random_state=56, low_memory=True)
    # Check there are no parents
    est.fit(boston.data, boston.target)
    assert_true(est._programs[-2] is None)
def main():
    if len(sys.argv) < 2:
        print("Provide data file name!")
        exit(1)

    filename = sys.argv[1]

    # Training samples
    x = read_nth_column(0, filename)
    x_train = np.ndarray((len(x), ),
                         buffer=np.array(x, dtype=float)).reshape(-1, 1)
    # print(x_train)
    y = read_nth_column(1, filename)
    y_train = np.ndarray((len(y), ), buffer=np.array(y, dtype=float))
    # print(y_train)

    # Testing samples
    X_test = read_nth_column(0, filename)
    y_test = read_nth_column(1, filename)

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=30,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               verbose=1,
                               parsimony_coefficient=0.01,
                               random_state=0,
                               function_set=('add', 'sub', 'mul', 'div', 'sin',
                                             'cos', 'sqrt', 'log'))
    est_gp.fit(x_train, y_train)

    print(est_gp._program)
Example #3
0
    def gp_model(self, gp_model):

        if self.problem == 'regression' or len(self.labels) == 2:

            if gp_model is None:
                f_names = self.gp_hyper_parameters.get('feature_names')
                if f_names is None:
                    self.gp_hyper_parameters['feature_names'] = self.feature_names
                self._gp_model = SymbolicRegressor(**self.gp_hyper_parameters)

            else:

                self._gp_model = gp_model
        else:

            dict_gp_model = {}

            for i in self.labels:

                if gp_model is None:
                    f_names = self.gp_hyper_parameters.get('feature_names')

                    if f_names is None:
                        self.gp_hyper_parameters['feature_names'] = self.feature_names

                    dict_gp_model[i] = SymbolicRegressor(**self.gp_hyper_parameters)

                else:

                    dict_gp_model[i] = gp_model

            self._gp_model = dict_gp_model
Example #4
0
def test_customized_regressor_metrics():
    """Check whether greater_is_better works for SymbolicRegressor."""

    x_data = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2

    est_gp = SymbolicRegressor(metric='mean absolute error',
                               stopping_criteria=0.000001, random_state=415,
                               parsimony_coefficient=0.001, init_method='full',
                               init_depth=(2, 4))
    est_gp.fit(x_data, y_true)
    formula = est_gp.__str__()
    assert_equal('add(mul(X1, X1), mul(X0, X0))', formula, True)

    def neg_mean_absolute_error(y, y_pred, sample_weight):
        return -1 * mean_absolute_error(y, y_pred, sample_weight)

    customized_fitness = make_fitness(neg_mean_absolute_error,
                                      greater_is_better=True)

    c_est_gp = SymbolicRegressor(metric=customized_fitness,
                                 stopping_criteria=-0.000001, random_state=415,
                                 parsimony_coefficient=0.001, verbose=0,
                                 init_method='full', init_depth=(2, 4))
    c_est_gp.fit(x_data, y_true)
    c_formula = c_est_gp.__str__()
    assert_equal('add(mul(X1, X1), mul(X0, X0))', c_formula, True)
Example #5
0
def test_verbose_output():
    """Check verbose=1 does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25),
                                        'Best Individual'.center(42))
    assert_equal(true_header, header1)

    header2 = verbose_output.readline().rstrip()
    true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10
    assert_equal(true_header, header2)

    header3 = verbose_output.readline().rstrip()
    header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness',
                     'OOB Fitness', 'Time Left')
    true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields
    assert_equal(true_header, header3)

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(10, n_lines)
Example #6
0
def test_verbose_output():
    """Check verbose=1 does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    true_header = '    |{:^25}|{:^42}|'.format('Population Average',
                                               'Best Individual')
    assert_equal(true_header, header1)

    header2 = verbose_output.readline().rstrip()
    true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10
    assert_equal(true_header, header2)

    header3 = verbose_output.readline().rstrip()

    line_format = '{:>4} {:>8} {:>16} {:>8} {:>16} {:>16} {:>10}'
    true_header = line_format.format('Gen', 'Length', 'Fitness', 'Length',
                                     'Fitness', 'OOB Fitness', 'Time Left')
    assert_equal(true_header, header3)

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)
Example #7
0
def symbolic_regressor(f, npoints, xrange):
    X = np.linspace(xrange[0], xrange[1], npoints).reshape((-1, 1))
    y = f(X)

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=20, stopping_criteria=0.01,
                               p_crossover=0.7, p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05, p_point_mutation=0.1,
                               max_samples=0.9, verbose=1,
                               parsimony_coefficient=0.01, random_state=0)

    est_gp.fit(X, y)

    sym_expr = str(est_gp._program)

    converter = {
        'sub': lambda x, y: x - y,
        'div': lambda x, y: x / y,
        'mul': lambda x, y: x * y,
        'add': lambda x, y: x + y,
        'neg': lambda x: -x,
        'pow': lambda x, y: x ** y
    }

    x, X0 = symbols('x X0')
    sym_reg = simplify(sympify(sym_expr, locals=converter))
    sym_reg = sym_reg.subs(X0, x)

    Y_true = y.reshape((-1, 1))
    Y_est = np.array([sympify(str(sym_reg)).subs(x, X[k]) for k in range(len(X))]).reshape((-1, 1))

    R2_perf = compute_Rsquared(Y_true, Y_est)

    return sym_reg, R2_perf
Example #8
0
def test_verbose_output():
    """Check verbose=1 does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25),
                                        'Best Individual'.center(42))
    assert_equal(true_header, header1)

    header2 = verbose_output.readline().rstrip()
    true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10
    assert_equal(true_header, header2)

    header3 = verbose_output.readline().rstrip()
    header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness',
                     'OOB Fitness', 'Time Left')
    true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields
    assert_equal(true_header, header3)

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)
Example #9
0
def test_more_verbose_output():
    """Check verbose=2 does not cause error"""

    old_stdout = sys.stdout
    old_stderr = sys.stderr
    sys.stdout = StringIO()
    sys.stderr = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=2)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    joblib_output = sys.stderr
    sys.stdout = old_stdout
    sys.stderr = old_stderr

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)

    joblib_output.seek(0)
    n_lines = sum(1 for l in joblib_output.readlines())
    # New version of joblib appears to output sys.stderr
    assert_equal(0, n_lines % 10)
def getSymbolicRegressorModel():
    rng = check_random_state(0)

    # Training samples
    X_train = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_train = X_train[:, 0]**2 - X_train[:, 1]**2 + X_train[:, 1] - 1

    # Testing samples
    X_test = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1

    est_gp = SymbolicRegressor(
        population_size=5000,
        generations=20,
        stopping_criteria=0.01,
        p_crossover=0.7,
        p_subtree_mutation=0.1,
        p_hoist_mutation=0.05,
        p_point_mutation=0.1,
        max_samples=0.9,
        verbose=1,
        parsimony_coefficient=0.01,
        random_state=0,
    )
    est_gp.fit(X_train, y_train)
    return est_gp._program
Example #11
0
def test_more_verbose_output():
    """Check verbose=2 does not cause error"""

    old_stdout = sys.stdout
    old_stderr = sys.stderr
    sys.stdout = StringIO()
    sys.stderr = StringIO()
    est = SymbolicRegressor(random_state=0, verbose=2)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    joblib_output = sys.stderr
    sys.stdout = old_stdout
    sys.stderr = old_stderr

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(10, n_lines)

    joblib_output.seek(0)
    n_lines = sum(1 for l in joblib_output.readlines())
    assert_equal(20, n_lines)
Example #12
0
def _gp_fit(arg):
    param = arg[0]
    X = arg[1]
    Y = arg[2]
    est_gp = SymbolicRegressor(
        population_size=param[0],
        generations=450,
        parsimony_coefficient=param[1],
        function_set=param[2].split(" "),
        const_range=(-param[3], param[3]),
    )

    training, validation = splitidx_srs(len(Y))
    X_train = X[training]
    Y_train = Y[training]
    X_validation = X[validation]
    Y_validation = Y[validation]

    try:
        est_gp.fit(X_train, Y_train)
        return (
            param,
            str(est_gp._program),
            est_gp._program.raw_fitness_,
            regression_measures(est_gp.predict(X_validation), Y_validation),
        )
    except Exception as e:
        return (param, "Exception: {}".format(str(e)), 999999999)
Example #13
0
def best_approximate(L,points):
	n=len(L)
	datax=[item[1] for item in L]
	datax.pop()
	datax.pop(0)
	datay=[item[0] for item in L]
	datay.pop()
	datay.pop(0)
	print(datax)
	print(datay)
	X_train, X_test, y_train, y_test = train_test_split(datax, datay, test_size=0.33) # random_state here is a random seed, fixed so that we always get the same results


	sr = SymbolicRegressor(		population_size=500,
    	    generations=20,
    	    stopping_criteria=0.01,	# stop if the mean squared error of the best solution is lower than this
        	function_set=('add', 'sub', 'mul', 'div'), # functions that the symbolic regression can use
        	p_crossover=0.54, 	# probabilities of activation of different genetic operators
        	p_subtree_mutation=0.1,	#
        	p_hoist_mutation=0.05, 	#
        	p_point_mutation=0.3,	#
        	verbose=1,		# print a lot of stuff to screen
	      )

	# launch the evolution
	sr.fit(X_train, y_train)
	Ypred=sr.predict(points)
	return Ypred
def experiment(seed, i):
    est_gp = SymbolicRegressor(population_size = pop_size,
                               generations=num_generations, stopping_criteria=0.01,
                               p_crossover=crossover_prob, p_subtree_mutation=mutation_prob,
                               p_hoist_mutation=mutation_prob, p_point_mutation=mutation_prob,
                               function_set = function_set,
                               max_samples=0.9, verbose=1,
                               metric=fit, random_state=seed)

    est_gp.fit(x, y)
    

    
    plt.figure(figsize=(14,5))
    plt.subplot(1,2,1)
    plt.xlabel('Generations', fontsize=24)
    plt.ylabel('Best fitness', fontsize=24)
    plt.plot(est_gp.run_details_['best_fitness'], linewidth=3.0)
    plt.grid()

    plt.subplot(1,2,2)
    plt.xlabel('Generations', fontsize=24)
    plt.ylabel('Best size', fontsize=24)
    plt.plot(est_gp.run_details_['best_length'], linewidth=3.0, color='red')
    plt.grid()

    plt.suptitle('Run {}'.format(i), fontsize=24)
    plt.savefig('plot_{}.eps'.format(seed))
    return est_gp.run_details_
def train():
    est_gp = SymbolicRegressor(population_size=150,
                               generations=20, stopping_criteria=0.001,
                               p_crossover=0.8, p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05, p_point_mutation=0.05,
                               max_samples=0.9, verbose=1, metric='mean absolute error',
                               parsimony_coefficient=0.01)
    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
Example #16
0
def test_none_const_range():
    """Check that const_range=None produces no constants"""

    # Check with None as const_range
    est = SymbolicRegressor(const_range=None, generations=2)
    est.fit(boston.data, boston.target)
    float_count = 0
    for generation in est._programs:
        for program in generation:
            if program is None:
                continue
            for element in program.program:
                if type(element) == float:
                    float_count += 1
    assert_true(float_count == 0)

    # Check with default const_range
    est = SymbolicRegressor(generations=2)
    est.fit(boston.data, boston.target)
    float_count = 0
    for generation in est._programs:
        for program in generation:
            if program is None:
                continue
            for element in program.program:
                if type(element) == float:
                    float_count += 1
    assert_true(float_count > 1)
Example #17
0
def test_none_const_range():
    """Check that const_range=None produces no constants"""

    # Check with None as const_range
    est = SymbolicRegressor(population_size=100, generations=2,
                            const_range=None)
    est.fit(boston.data, boston.target)
    float_count = 0
    for generation in est._programs:
        for program in generation:
            if program is None:
                continue
            for element in program.program:
                if isinstance(element, float):
                    float_count += 1
    assert(float_count == 0)

    # Check with default const_range
    est = SymbolicRegressor(population_size=100, generations=2)
    est.fit(boston.data, boston.target)
    float_count = 0
    for generation in est._programs:
        for program in generation:
            if program is None:
                continue
            for element in program.program:
                if isinstance(element, float):
                    float_count += 1
    assert(float_count > 1)
Example #18
0
def test_subsample():
    """Check that subsample work and that results differ"""

    est1 = SymbolicRegressor(max_samples=1.0, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(max_samples=0.7, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicClassifier(stopping_criteria=.9, random_state=0)
    est1.fit(cancer.data[:400, :], cancer.target[:400])
    assert_true(len(est1._programs) == 1)
def train():
    est_gp = SymbolicRegressor(population_size=150,
                               generations=20,
                               stopping_criteria=0.001,
                               p_crossover=0.8,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.05,
                               max_samples=0.9,
                               verbose=1,
                               metric='mean absolute error',
                               parsimony_coefficient=0.01)
    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
def test_pipeline():
    """Check that SymbolicRegressor/Transformer can work in a pipeline"""

    # Check the regressor
    est = make_pipeline(StandardScaler(),
                        SymbolicRegressor(population_size=50,
                                          generations=5,
                                          tournament_size=5,
                                          random_state=0))
    est.fit(boston.data, boston.target)
    assert_almost_equal(est.score(boston.data, boston.target), -4.00270923)

    # Check the classifier
    est = make_pipeline(StandardScaler(),
                        SymbolicClassifier(population_size=50,
                                           generations=5,
                                           tournament_size=5,
                                           random_state=0))
    est.fit(cancer.data, cancer.target)
    assert_almost_equal(est.score(cancer.data, cancer.target), 0.934973637961)

    # Check the transformer
    est = make_pipeline(SymbolicTransformer(population_size=50,
                                            hall_of_fame=20,
                                            generations=5,
                                            tournament_size=5,
                                            random_state=0),
                        DecisionTreeRegressor())
    est.fit(boston.data, boston.target)
    assert_almost_equal(est.score(boston.data, boston.target), 1.0)
Example #22
0
def test_parsimony_coefficient():
    """Check that parsimony coefficients work and that results differ"""

    est1 = SymbolicRegressor(population_size=100, generations=2,
                             parsimony_coefficient=0.001, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(population_size=100, generations=2,
                             parsimony_coefficient='auto', random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert(abs(est1 - est2) > 0.01)
Example #23
0
def test_trigonometric():
    """Check that using trig functions work and that results differ"""

    est1 = SymbolicRegressor(random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(
        function_set=['add', 'sub', 'mul', 'div', 'sin', 'cos', 'tan'],
        random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
def test_parallel_train():
    """Check predictions are the same for different n_jobs"""

    # Check the regressor
    ests = [
        SymbolicRegressor(population_size=100, generations=4, n_jobs=n_jobs,
                          random_state=0).fit(boston.data[:100, :],
                                              boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.predict(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)

    # Check the transformer
    ests = [
        SymbolicTransformer(population_size=100, hall_of_fame=50,
                            generations=4, n_jobs=n_jobs,
                            random_state=0).fit(boston.data[:100, :],
                                                boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.transform(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)
Example #25
0
def runProgram(X_train,y_train,w):
    SymbolicRegressor(population_size=5000,
                           generations=25, stopping_criteria=0.01,
                           p_crossover=0.65, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1, function_set=['add', 'sub', 'mul', 'div','sqrt'],
                           parsimony_coefficient=0.01, random_state=1, init_depth=(3,6),
                           tournament_size=10,metric='mean absolute error')
Example #26
0
def pca_gp(rows, features, function):
    run_results = {}
    for run_number in range(0, NUMBER_OF_RUNS):
        # Generating random data
        rng = check_random_state(run_number)
        X = rng.uniform(-1, 1, rows).reshape(rows // features, features)
        Y = function(X)

        # Dividing it into training and test set
        X_train, X_test, y_train, y_test = train_test_split(
            X, Y, test_size=TEST_SIZE, random_state=0)

        # Convert it to PCA
        pca = PCA(n_components=1)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)

        # Training the system
        est_gp = SymbolicRegressor(population_size=POPULATION_SIZE,
                                   generations=NUMBER_OF_GENERATION,
                                   stopping_criteria=0.01,
                                   p_crossover=0.7,
                                   p_subtree_mutation=0.1,
                                   p_hoist_mutation=0.05,
                                   p_point_mutation=0.1,
                                   max_samples=0.9,
                                   verbose=1,
                                   parsimony_coefficient=0.01,
                                   random_state=0,
                                   n_jobs=-1)
        est_gp.fit(X_train_pca, y_train)
        generation_results = []
        for idGen in range(len(est_gp._programs)):
            single_generation = {}
            single_generation[idGen] = math.inf
            for idPopulation in range(est_gp.population_size):
                if (est_gp._programs[idGen][idPopulation] != None):
                    if est_gp._programs[idGen][
                            idPopulation].raw_fitness_ < single_generation[
                                idGen]:
                        single_generation[idGen] = est_gp._programs[idGen][
                            idPopulation].raw_fitness_
            generation_results.append(single_generation)
        run_results[run_number] = generation_results
    return run_results
Example #27
0
def test_verbose_with_oob():
    """Check oob scoring for subsample does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(10, n_lines)
Example #28
0
def main():
    data = read_data()
    regressor = SymbolicRegressor(population_size=1000,
                                  generations=100,
                                  const_range=(.0, .0),
                                  init_depth=(2, 10),
                                  init_method='grow',
                                  function_set=('add', 'sub', 'mul', 'div',
                                                'log', 'sin', 'cos'),
                                  p_crossover=0.7,
                                  p_subtree_mutation=0.0,
                                  p_hoist_mutation=0.0,
                                  p_point_mutation=0.0,
                                  verbose=1,
                                  n_jobs=-1)
    (n, _) = data.shape
    regressor.fit(data[:, 0].reshape(n, 1), data[:, 1])
    print(regressor._program)
Example #29
0
    def fit(self, x_data):
        est_gp = SymbolicRegressor(population_size=500,
                                   generations=10,
                                   stopping_criteria=0.0001,
                                   p_crossover=0.7,
                                   p_subtree_mutation=0.1,
                                   p_hoist_mutation=0.05,
                                   p_point_mutation=0.1,
                                   metric=make_fitness(
                                       self.make_explict_func(), False),
                                   function_set=self.function_set,
                                   verbose=1,
                                   parsimony_coefficient=0.01)

        indicies = np.arange(x_data.shape[0])
        est_gp.fit(x_data, indicies)

        return est_gp
Example #30
0
def test_verbose_with_oob():
    """Check oob scoring for subsample does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header1 = verbose_output.readline().rstrip()
    header2 = verbose_output.readline().rstrip()
    header3 = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(20, n_lines)
def test_gridsearch():
    """Check that SymbolicRegressor can be grid-searched"""

    # Grid search parsimony_coefficient
    parameters = {'parsimony_coefficient': [0.001, 0.1, 'auto']}
    clf = SymbolicRegressor(population_size=50, generations=5,
                            tournament_size=5, random_state=0)
    grid = GridSearchCV(clf, parameters, scoring='neg_mean_absolute_error')
    grid.fit(boston.data, boston.target)
    expected = {'parsimony_coefficient': 0.001}
    assert_equal(grid.best_params_, expected)
Example #32
0
def result(train_data: tuple, test_data: tuple, verbose: bool):
    if verbose:
        print(">>>>>>>>>>>>>>>>>>>>>>>EVOLUTIONIST<<<<<<<<<<<<<<<<<<<<<<<<<<")

    train_atts, train_targets = train_data
    test_atts, test_targets = test_data

    clf = SymbolicRegressor(verbose=verbose)

    y_predict = clf.fit(train_atts, train_targets).predict(test_atts)

    i = 0
    for predict, target in zip(y_predict, test_targets):
        if predict == target:
            i += 1

    if verbose:
        print(F"\tFINAL PROGRAM: {clf._program}")

    return i
Example #33
0
def test_verbose_with_oob():
    """Check oob scoring for subsample does not cause error"""

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    est = SymbolicRegressor(population_size=100, generations=10,
                            max_samples=0.9, random_state=0, verbose=1)
    est.fit(boston.data, boston.target)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    # Ignore header rows
    _ = verbose_output.readline().rstrip()
    _ = verbose_output.readline().rstrip()
    _ = verbose_output.readline().rstrip()

    n_lines = sum(1 for l in verbose_output.readlines())
    assert_equal(10, n_lines)
Example #34
0
def regressionOfFailureRate(coords,
                            seed=None,
                            population_size=None,
                            generations=None):
    """
	Pokusí se co nejlépe proložit body \a coords vyjadřující četnost chyb.
	Snaží se při tom aby výsledek byl integrovatelný, ovšem integrovatelnost nezaručuje.
	"""
    if population_size is None:
        population_size = 1000
    if generations is None:
        generations = 20

    # Rozdělení x-ových a y-ových souřadnic pro GpLearn
    X_train, y_train = zip(*(([x], y) for (x, y) in coords))

    from gplearn.genetic import SymbolicRegressor
    # Kolik náhodných čísel gplearn vygeneruje? Není omezeno. Buď se dosadí funkce, proměnná nebo se vygeneruje náhodné číslo z daného intervalu.
    est_gp = SymbolicRegressor(  # Estimator Genetic Programming
        population_size=population_size,
        generations=1,
        tournament_size=20,
        stopping_criteria=0.0,
        const_range=(0.0, 5.0),
        init_depth=(2, 6),
        init_method='half and half',
        function_set=('add', 'mul'),
        metric='mean absolute error',  #metric=sum_absolute_error
        parsimony_coefficient=0.001,
        p_crossover=0.9,
        p_subtree_mutation=0.01,
        p_hoist_mutation=0.01,
        p_point_mutation=0.01,
        p_point_replace=0.05,
        max_samples=1.0,
        warm_start=False,
        n_jobs=-1,
        verbose=VERBOSITY,
        random_state=seed)
    est_gp.fit(X_train, y_train)
    for p in est_gp._programs[0]:
        p.program[
            0] = gplearn.functions.div2  # Všechny kořeny přepíšeme na dělení
    for i in range(1, generations):
        for p in est_gp._programs[i - 1]:
            p.get_subtree = functools.partial(
                get_subtree, p)  # Všem potomkům zakážeme křížení z kořene
        est_gp.set_params(generations=i + 1, warm_start=True)
        est_gp.fit(X_train, y_train)
    best_individual = est_gp._program
    return est_gp, extractExprFromGplearn(best_individual.program)
Example #35
0
def test_trigonometric():
    """Check that using trig functions work and that results differ"""

    est1 = SymbolicRegressor(random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(trigonometric=True, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
Example #36
0
def test_subsample():
    """Check that subsample work and that results differ"""

    est1 = SymbolicRegressor(max_samples=1.0, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(max_samples=0.7, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
Example #37
0
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)
Example #38
0
def test_input_shape():
    """Check changed dimensions cause failure"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    X2 = np.reshape(random_state.uniform(size=45), (5, 9))

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.predict, X2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.transform, X2)
Example #39
0
def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)
Example #40
0
def main():
    escalation = {
        Position.GOALKEEPER: 1,
        Position.DEFENDER: 2,
        Position.SIDE: 2,
        Position.MIDFIELD: 4,
        Position.ATTACKER: 2,
        Position.COACH: 1
    }

    print("Getting auth")
    auth = get_auth()

    print("Getting teams")
    teams = get_teams()

    print("Getting athletes")
    athletes = get_athletes(teams)

    print("Getting scores")
    scores = [athlete.get_row(auth) for athlete in athletes]
    max_length = 0
    for score in scores:
        if len(score) > max_length:
            max_length = len(score)
    fixed_score = []
    for score in scores:
        fixed_score.append([0.0] * (max_length - len(score)) + score)

    generations = 2000
    print("Training using " + str(generations) + " generations. It can take a long time to end")
    est_gp = SymbolicRegressor(
        population_size=5000,
        generations=generations,
        stopping_criteria=0.01,
        p_crossover=0.7,
        p_subtree_mutation=0.1,
        p_hoist_mutation=0.05,
        p_point_mutation=0.1,
        max_samples=0.9,
        verbose=1,
        parsimony_coefficient=0.01,
        random_state=0,
        const_range=(-50., 50.),
        function_set=(
            'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan'))
    est_gp.fit([x[:-1] for x in fixed_score], [x[-1] for x in fixed_score])
    predictions = est_gp.predict([x[:-1] for x in fixed_score])

    print("Getting results")
    results = [[athlete, prediction] for athlete, prediction in zip(athletes, predictions)]
    results.sort(key=lambda x: -x[1])
    print("\"Scale\",\"Name\",\"Team\",\"Position\",\"Status\",\"Price\",\"Prediction\"")
    for result in results:
        athlete = result[0]
        prediction = result[1]
        scale = athlete.status == Status.Probable and escalation[athlete.position] > 0
        if scale:
            escalation[athlete.position] = escalation[athlete.position] - 1
        print("\"" +
              ("*" if scale else " ") + "\",\"" +
              athlete.nick + "\",\"" +
              athlete.club.name + "\",\"" +
              str(athlete.position.name) + "\",\"" +
              str(athlete.status.name) + "\"," +
              str(athlete.price) + "," +
              str(prediction))

    print("Done")
Example #41
0
def test_sample_weight():
    """Check sample_weight param works"""

    # Check constant sample_weight has no effect
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicRegressor(generations=2, random_state=0)
    est1.fit(boston.data, boston.target)
    est2 = SymbolicRegressor(generations=2, random_state=0)
    est2.fit(boston.data, boston.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicRegressor(generations=2, random_state=0)
    est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the transformer
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicTransformer(generations=2, random_state=0)
    est1 = est1.fit_transform(boston.data, boston.target)
    est2 = SymbolicTransformer(generations=2, random_state=0)
    est2 = est2.fit_transform(boston.data, boston.target,
                              sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicTransformer(generations=2, random_state=0)
    est3 = est3.fit_transform(boston.data, boston.target,
                              sample_weight=sample_weight * 1.1)

    assert_array_almost_equal(est1, est2)
    assert_array_almost_equal(est1, est3)
Example #42
0
def test_parsimony_coefficient():
    """Check that parsimony coefficients work and that results differ"""

    est1 = SymbolicRegressor(parsimony_coefficient=0.001, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(parsimony_coefficient=0.1, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    est3 = SymbolicRegressor(parsimony_coefficient='auto', random_state=0)
    est3.fit(boston.data[:400, :], boston.target[:400])
    est3 = mean_absolute_error(est3.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert_true(abs(est1 - est2) > 0.01)
    assert_true(abs(est1 - est3) > 0.01)
    assert_true(abs(est2 - est3) > 0.01)
Example #43
0
def test_print_overloading_estimator():
    """Check that printing a fitted estimator results in 'pretty' output"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)

    # Unfitted
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_unfitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    # Fitted
    est.fit(X, y)
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_fitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est._program)
        output_program = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    assert_true(output_unfitted != output_fitted)
    assert_true(output_unfitted == est.__repr__())
    assert_true(output_fitted == output_program)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)

    # Unfitted
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_unfitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    # Fitted
    est.fit(X, y)
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_fitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        output = str([gp.__str__() for gp in est])
        print(output.replace("',", ",\n").replace("'", ""))
        output_program = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    assert_true(output_unfitted != output_fitted)
    assert_true(output_unfitted == est.__repr__())
    assert_true(output_fitted == output_program)