Example #1
0
def genetic_feat(df, num_gen=20, num_comp=10):
    from gplearn.genetic import SymbolicTransformer

    function_set = [
        'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'tan'
    ]

    gp = SymbolicTransformer(generations=num_gen,
                             population_size=200,
                             hall_of_fame=100,
                             n_components=num_comp,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             verbose=1,
                             random_state=0,
                             n_jobs=6)

    gen_feats = gp.fit_transform(df.drop("close", axis=1), df["close"])
    df.iloc[:, :8]
    gen_feats = pd.DataFrame(
        gen_feats,
        columns=["gen_" + str(a) for a in range(gen_feats.shape[1])])
    gen_feats.index = df.index
    return pd.concat((df, gen_feats), axis=1)
Example #2
0
def test_symbolic_transformer():
    """Check that SymbolicTransformer example works"""

    rng = check_random_state(0)
    boston = load_boston()
    perm = rng.permutation(boston.target.size)
    boston.data = boston.data[perm]
    boston.target = boston.target[perm]

    est = Ridge()
    est.fit(boston.data[:300, :], boston.target[:300])
    assert_almost_equal(est.score(boston.data[300:, :], boston.target[300:]),
                        0.759319453049884)

    function_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log',
                    'abs', 'neg', 'inv', 'max', 'min']
    gp = SymbolicTransformer(generations=20, population_size=2000,
                             hall_of_fame=100, n_components=10,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             random_state=0)
    gp.fit(boston.data[:300, :], boston.target[:300])

    gp_features = gp.transform(boston.data)
    new_boston = np.hstack((boston.data, gp_features))

    est = Ridge()
    est.fit(new_boston[:300, :], boston.target[:300])
    assert_almost_equal(est.score(new_boston[300:, :], boston.target[300:]),
                        0.8418372105182055)
Example #3
0
def test_transformer_iterable():
    """Check that the transformer is iterable"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    est = SymbolicTransformer(generations=2, random_state=0)

    # Check unfitted
    unfitted_len = len(est)
    unfitted_iter = [gp.length_ for gp in est]
    expected_iter = []

    assert_true(unfitted_len == 0)
    assert_true(unfitted_iter == expected_iter)

    # Check fitted
    est.fit(X, y)
    fitted_len = len(est)
    fitted_iter = [gp.length_ for gp in est]
    expected_iter = [15, 19, 19, 12, 9, 10, 7, 14, 6, 21]

    assert_true(fitted_len == 10)
    assert_true(fitted_iter == expected_iter)

    # Check IndexError
    assert_raises(IndexError, est.__getitem__, 10)
Example #4
0
def pd_colcat_symbolic(df, col, pars):
    """
       https://github.com/arita37/deltapy

       pip install deltapy

    """
    pars_encoder = pars
    pars_encoder['cols'] = col
    if 'path_pipeline_export' in pars:
        try:
            pars_encoder = load(pars['path_pipeline_export'] +
                                '/col_genetic_pars.pkl')
            model_encoder = load(pars['path_pipeline_export'] +
                                 '/col_genetic_model.pkl')
            col_encoder = load(pars['path_pipeline_export'] +
                               '/col_genetic.pkl')
        except:
            pass

    ###################################################################################
    coly = pars['coly']
    from gplearn.genetic import SymbolicTransformer
    function_set = [
        'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'tan'
    ]

    gp = SymbolicTransformer(generations=20,
                             population_size=200,
                             hall_of_fame=100,
                             n_components=10,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             verbose=1,
                             random_state=0,
                             n_jobs=6)

    gen_feats = gp.fit_transform(df[col], df[coly])
    gen_feats = pd.DataFrame(
        gen_feats,
        columns=["gen_" + str(a) for a in range(gen_feats.shape[1])])
    gen_feats.index = df.index
    dfnew = gen_feats
    dfnew.columns = [t for t in dfnew.columns]

    ###################################################################################
    colnew = list(dfnew.columns)
    if 'path_features_store' in pars and 'path_pipeline_export' in pars:
        save_features(dfnew, 'dfgen', pars['path_features_store'])
        save(gp, pars['path_pipeline_export'] + "/col_genetic_model.pkl")
        save(pars_encoder,
             pars['path_pipeline_export'] + "/col_genetic_pars.pkl")
        save(colnew, pars['path_pipeline_export'] + "/col_genetic.pkl")

    col_pars = {'model': gp}
    col_pars['cols_new'] = {
        'col_genetic': colnew  ### list
    }
    return dfnew, col_pars
Example #5
0
def pd_col_genetic_transform(df=None, col=None, pars=None):
    num_gen=20
    num_comp=10
    function_set = ['add', 'sub', 'mul', 'div',
           'sqrt', 'log', 'abs', 'neg', 'inv','tan']

    gp = SymbolicTransformer(generations=num_gen, population_size=200,
    hall_of_fame=100, n_components=num_comp,
    function_set=function_set,
    parsimony_coefficient=0.0005,
    max_samples=0.9, verbose=1,
    random_state=0, n_jobs=6)

    gen_feats = gp.fit_transform(train_X, train_y)
    gen_feats = pd.DataFrame(gen_feats, columns=["gen_"+str(a) for a in range(gen_feats.shape[1])])
    gen_feats.index = train_X.index
    train_X_all=pd.concat((train_X,gen_feats),axis=1)
    gen_feats = gp.transform(test_X)
    gen_feats = pd.DataFrame(gen_feats, columns=["gen_"+str(a) for a in range(gen_feats.shape[1])])
    gen_feats.index = test_X.index
    test_X_all=pd.concat((test_X,gen_feats),axis=1)

    gen_feats = gp.transform(val_X)
    gen_feats = pd.DataFrame(gen_feats, columns=["gen_"+str(a) for a in range(gen_feats.shape[1])])
    gen_feats.index = val_X.index
    val_X_all=pd.concat((val_X,gen_feats),axis=1)
    return train_X_all,test_X_all,val_X_all
Example #6
0
def symbolic_transformer(X, y, encoder=None):
    """Transform features using multiple operations. This will add new features to the data frame.

    Args:
        X (DataFrame): Independent features
        y (Series): Dependen feature or target
        encoder (obj, optional): Object of the type 'SymbolicTransformer'. Defaults to None.

    Returns:
        DataFrame: Additional columns calculated by the algorithm
    """
    if encoder is None:
        function_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log',
                        'abs', 'neg', 'inv', 'max', 'min']
        encoder = SymbolicTransformer(generations=10,
                                population_size=1000,
                                hall_of_fame=100,
                                n_components=12,
                                function_set=function_set,
                                parsimony_coefficient=0.0005,
                                max_samples=0.9,
                                verbose=1,
                                random_state=123,
                                n_jobs=-1)
        encoder.fit(X, y)
    gp_features = encoder.transform(X)

    return gp_features, encoder
Example #7
0
def test_sample_weight():
    """Check sample_weight param works"""

    # Check constant sample_weight has no effect
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicRegressor(generations=2, random_state=0)
    est1.fit(boston.data, boston.target)
    est2 = SymbolicRegressor(generations=2, random_state=0)
    est2.fit(boston.data, boston.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicRegressor(generations=2, random_state=0)
    est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the transformer
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicTransformer(generations=2, random_state=0)
    est1 = est1.fit_transform(boston.data, boston.target)
    est2 = SymbolicTransformer(generations=2, random_state=0)
    est2 = est2.fit_transform(boston.data,
                              boston.target,
                              sample_weight=sample_weight)

    assert_array_almost_equal(est1, est2)
def Genetic_P(dataset, target):
    append = 'mean_per_hour'
    a = dataset[append]
    y = dataset[target]
    X = dataset.copy()
    X = X.drop(target, axis=1)
    X = X.drop(append, axis=1)
    function_set = [
        'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max',
        'min', 'sin', 'cos', 'tan'
    ]
    gp = SymbolicTransformer(generations=20,
                             population_size=2000,
                             hall_of_fame=100,
                             n_components=15,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             verbose=1,
                             random_state=random_seed,
                             n_jobs=3)
    gp_features = gp.fit_transform(X, y)
    print('Number of features created out of genetic programing: {}'.format(
        gp_features.shape))
    n = pd.DataFrame(gp_features)
    n = n.set_index(dataset.index.values)
    new_X = pd.concat([dataset, n], axis=1)
    new_X = new_X.dropna()
    return new_X
Example #9
0
def test_transformer_iterable():
    """Check that the transformer is iterable"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    function_set = [
        'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max',
        'min'
    ]
    est = SymbolicTransformer(population_size=500,
                              generations=2,
                              function_set=function_set,
                              random_state=0)

    # Check unfitted
    unfitted_len = len(est)
    unfitted_iter = [gp.length_ for gp in est]
    expected_iter = []

    assert_true(unfitted_len == 0)
    assert_true(unfitted_iter == expected_iter)

    # Check fitted
    est.fit(X, y)
    fitted_len = len(est)
    fitted_iter = [gp.length_ for gp in est]
    expected_iter = [8, 12, 2, 29, 9, 33, 9, 8, 4, 22]

    assert_true(fitted_len == 10)
    assert_true(fitted_iter == expected_iter)

    # Check IndexError
    assert_raises(IndexError, est.__getitem__, 10)
Example #10
0
def test_output_shape():
    """Check output shape is as expected"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)

    # Check the transformer
    est = SymbolicTransformer(n_components=5, generations=2, random_state=0)
    est.fit(X, y)
    assert_true(est.transform(X).shape == (5, 5))
Example #11
0
def test_output_shape():
    """Check output shape is as expected"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)

    # Check the transformer
    est = SymbolicTransformer(n_components=5, generations=2, random_state=0)
    est.fit(X, y)
    assert_true(est.transform(X).shape == (5, 5))
Example #12
0
class GplearnDemo(object):
    def __init__(self):
        # data prepare
        self.__boston = None
        self.__boston_feature = None
        self.__boston_label = None
        self.__train_feature, self.__test_feature = [None for _ in range(2)]
        self.__train_label, self.__test_label = [None for _ in range(2)]
        self.__transformer = None
        self.__gp_train_feature = None
        self.__gp_test_feature = None

        # model fit
        self.__regressor = None

    def data_prepare(self):
        self.__boston = load_boston()
        self.__boston_feature = pd.DataFrame(
            self.__boston.data, columns=self.__boston.feature_names)
        self.__boston_label = pd.Series(
            self.__boston.target).to_frame("TARGET").squeeze()

        self.__train_feature, self.__test_feature, self.__train_label, self.__test_label = (
            train_test_split(self.__boston_feature,
                             self.__boston_label,
                             test_size=0.5,
                             shuffle=True))

        # 不能有缺失值
        self.__transformer = SymbolicTransformer(n_jobs=4)
        self.__transformer.fit(self.__train_feature, self.__train_label)
        self.__gp_train_feature = self.__transformer.transform(
            self.__train_feature)
        self.__gp_test_feature = self.__transformer.transform(
            self.__test_feature)

    def model_fit_predict(self):
        self.__regressor = Ridge()
        self.__regressor.fit(self.__train_feature, self.__train_label)
        print(
            mean_squared_error(self.__test_label,
                               self.__regressor.predict(self.__test_feature)))

        self.__regressor = Ridge()
        self.__regressor.fit(
            np.hstack((self.__train_feature.values, self.__gp_train_feature)),
            self.__train_label)
        print(
            mean_squared_error(
                self.__test_label,
                self.__regressor.predict(
                    np.hstack((self.__test_feature.values,
                               self.__gp_test_feature)))))
Example #13
0
def pd_col_genetic_transform(df=None, col=None, pars=None):
    """
        Find Symbolic formulae for faeture engineering

    """
    prefix = 'col_genetic'
    ######################################################################################
    from gplearn.genetic import SymbolicTransformer
    coly = pars['coly']
    colX = [t for t in col if t not in [coly]]
    train_X = df[colX]
    train_y = df[coly]

    function_set = [
        'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'tan'
    ]
    pars_genetic = pars.get('pars_genetic', {
        'generations': 20,
        'n_components': 10,
        'population_size': 200
    })

    gp = SymbolicTransformer(hall_of_fame=100,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             verbose=1,
                             random_state=0,
                             n_jobs=6,
                             **pars_genetic)

    gp.fit(train_X, train_y)
    df_genetic = gp.transform(train_X)
    df_genetic = pd.DataFrame(
        df_genetic,
        columns=["gen_" + str(a) for a in range(df_genetic.shape[1])])
    df_genetic.index = train_X.index

    col_genetic = list(df_genetic.columns)
    ###################################################################################
    if 'path_features_store' in pars and 'path_pipeline_export' in pars:
        save_features(df_genetic, 'df_genetic', pars['path_features_store'])
        save(gp, pars['path_pipeline_export'] + f"/{prefix}_model.pkl")
        save(col_genetic, pars['path_pipeline_export'] + f"/{prefix}.pkl")
        save(pars_genetic,
             pars['path_pipeline_export'] + f"/{prefix}_pars.pkl")

    col_pars = {'model': gp, 'pars': pars_genetic}
    col_pars['cols_new'] = {
        'col_genetic': col_genetic  ### list
    }
    return df_genetic, col_pars
Example #14
0
 def feature_generate(self):
     self.__genetic_transformer = SymbolicTransformer(population_size=10000,
                                                      generations=200,
                                                      tournament_size=200,
                                                      metric="spearman",
                                                      n_jobs=-1,
                                                      verbose=1)
     self.__genetic_transformer.fit(self.__train_feature,
                                    self.__train_label)
     self.__genetic_train_feature = self.__genetic_transformer.transform(
         self.__train_feature)
     self.__genetic_test_feature = self.__genetic_transformer.transform(
         self.__test_feature)
Example #15
0
def symbolic_features(p_x, p_y):
    """
    Funcion para crear regresores no lineales

    Parameters
    ----------
    p_x: pd.DataFrame
        with regressors or predictor variables

    p_y: pd.DataFrame with variable to predict

    Returns
    -------
    results: model

    """
    model = SymbolicTransformer(function_set=[
        "sub", "add", 'inv', 'mul', 'div', 'abs', 'log', "max", "min", "sin",
        "cos"
    ],
                                population_size=5000,
                                hall_of_fame=100,
                                n_components=20,
                                generations=20,
                                tournament_size=20,
                                stopping_criteria=.05,
                                const_range=None,
                                init_depth=(4, 12),
                                metric='pearson',
                                parsimony_coefficient=0.001,
                                p_crossover=0.4,
                                p_subtree_mutation=0.2,
                                p_hoist_mutation=0.1,
                                p_point_mutation=0.3,
                                p_point_replace=.05,
                                verbose=1,
                                random_state=None,
                                n_jobs=-1,
                                feature_names=p_x.columns,
                                warm_start=True)

    init = model.fit_transform(p_x[:'01-01-2019'], p_y[:'01-01-2019'])
    model_params = model.get_params()
    gp_features = model.transform(p_x)
    model_fit = np.hstack((p_x, gp_features))
    results = {'fit': model_fit, 'params': model_params, 'model': model}

    return results
Example #16
0
def main():
    with timer('读取文件时间'):
        train = pd.read_csv('train_541.csv', nrows=10000)
        test = pd.read_csv('test_541.csv', nrows=10000)
        print('Training set full shape: ', train.shape)
        print('Testing set full shape: ', test.shape)

    function_set = [
        'add', 'sub', 'mul', 'div', 'log', 'sqrt', 'abs', 'neg', 'max', 'min'
    ]

    gp1 = SymbolicTransformer(generations=1,
                              population_size=1000,
                              hall_of_fame=600,
                              n_components=100,
                              function_set=function_set,
                              parsimony_coefficient=0.0005,
                              max_samples=0.9,
                              verbose=1,
                              random_state=0,
                              n_jobs=3)

    train.fillna('median', inplace=True)
    test.fillna('mdeian', inplace=True)

    print('填充完毕')

    with timer('test pg1'):
        test_gp(gp1, 100, train, test, foldername='pg1')
def test_pipeline():
    """Check that SymbolicRegressor/Transformer can work in a pipeline"""

    # Check the regressor
    est = make_pipeline(StandardScaler(),
                        SymbolicRegressor(population_size=50,
                                          generations=5,
                                          tournament_size=5,
                                          random_state=0))
    est.fit(boston.data, boston.target)
    assert_almost_equal(est.score(boston.data, boston.target), -4.00270923)

    # Check the classifier
    est = make_pipeline(StandardScaler(),
                        SymbolicClassifier(population_size=50,
                                           generations=5,
                                           tournament_size=5,
                                           random_state=0))
    est.fit(cancer.data, cancer.target)
    assert_almost_equal(est.score(cancer.data, cancer.target), 0.934973637961)

    # Check the transformer
    est = make_pipeline(SymbolicTransformer(population_size=50,
                                            hall_of_fame=20,
                                            generations=5,
                                            tournament_size=5,
                                            random_state=0),
                        DecisionTreeRegressor())
    est.fit(boston.data, boston.target)
    assert_almost_equal(est.score(boston.data, boston.target), 1.0)
def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)

    # Check the classifier
    est = SymbolicClassifier(generations=2, random_state=0)
    est.fit(cancer.data[:100, :], cancer.target[:100])
    score = est.score(cancer.data[500:, :], cancer.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(cancer.data[500:, :], cancer.target[500:])
    assert_equal(score, score2)
def test_parallel_train():
    """Check predictions are the same for different n_jobs"""

    # Check the regressor
    ests = [
        SymbolicRegressor(population_size=100, generations=4, n_jobs=n_jobs,
                          random_state=0).fit(boston.data[:100, :],
                                              boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.predict(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)

    # Check the transformer
    ests = [
        SymbolicTransformer(population_size=100, hall_of_fame=50,
                            generations=4, n_jobs=n_jobs,
                            random_state=0).fit(boston.data[:100, :],
                                                boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.transform(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)
Example #20
0
    def fit(self, X, y=None, state={}):
        exponential = make_function(function=exponent, name='exp', arity=1)

        function_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max',
                        'min', 'tan', 'sin', 'cos', exponential]

        gp = SymbolicTransformer(generations=self.generations, population_size=self.population,
                                 hall_of_fame=self.hall_of_fame, n_components=self.components,
                                 function_set=function_set,
                                 parsimony_coefficient='auto',
                                 max_samples=0.6, verbose=1, metric=self.metric,
                                 random_state=0, n_jobs=7)

        self.state['genetic'] = {}
        self.state['genetic']['fit'] = gp.fit(X, y)

        return self
Example #21
0
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)
Example #22
0
def symbolicLearning(df_list):
    '''
    
    :param df_list: 
    :return: 
    '''
    df_list = pd.DataFrame(df_list)
    function_set = ['add', 'sub', 'mul', 'div', 'log', 'sqrt', 'abs', 'neg', 'max', 'min']

    gp = SymbolicTransformer(generations=10, population_size=1000,
                              hall_of_fame=100, n_components=10,
                              function_set=function_set,
                              parsimony_coefficient=0.0005,
                              max_samples=0.9, verbose=1,
                              random_state=0, n_jobs=3)
    gp_feature = gp.transform(df_list)
    new_feature_name = [str(i) + 'V' for i in range(1, len(function_set)+1)]
    new_feature = pd.DataFrame(gp_feature, columns=new_feature_name)
    return new_feature
Example #23
0
    def data_prepare(self):
        self.__boston = load_boston()
        self.__boston_feature = pd.DataFrame(
            self.__boston.data, columns=self.__boston.feature_names)
        self.__boston_label = pd.Series(
            self.__boston.target).to_frame("TARGET").squeeze()

        self.__train_feature, self.__test_feature, self.__train_label, self.__test_label = (
            train_test_split(self.__boston_feature,
                             self.__boston_label,
                             test_size=0.5,
                             shuffle=True))

        # 不能有缺失值
        self.__transformer = SymbolicTransformer(n_jobs=4)
        self.__transformer.fit(self.__train_feature, self.__train_label)
        self.__gp_train_feature = self.__transformer.transform(
            self.__train_feature)
        self.__gp_test_feature = self.__transformer.transform(
            self.__test_feature)
def test_function_in_program():
    """Check that using a custom function in a program works"""
    def logic(x1, x2, x3, x4):
        return np.where(x1 > x2, x3, x4)

    logical = make_function(function=logic, name='logical', arity=4)
    function_set = ['add', 'sub', 'mul', 'div', logical]
    est = SymbolicTransformer(generations=2,
                              population_size=2000,
                              hall_of_fame=100,
                              n_components=10,
                              function_set=function_set,
                              parsimony_coefficient=0.0005,
                              max_samples=0.9,
                              random_state=0)
    est.fit(boston.data[:300, :], boston.target[:300])

    formula = est._programs[0][906].__str__()
    expected_formula = 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)'
    assert_equal(expected_formula, formula, True)
    def data_prepare(self):
        self.__digists = load_digits(n_class=2)
        self.__X = self.__digists.data
        self.__y = self.__digists.target

        self.__train, self.__test, self.__train_label, self.__test_label = train_test_split(
            self.__X, self.__y, test_size=0.2, random_state=9)

        # standard scaler
        scaler = StandardScaler().fit(self.__train)
        self.__train = scaler.transform(self.__train)
        self.__test = scaler.transform(self.__test)

        # gp feature
        function_set = ("add", "sub", "mul", "div", "sqrt", "log", "abs",
                        "neg", "inv", "max", "min")

        gp = SymbolicTransformer(generations=5,
                                 population_size=2000,
                                 hall_of_fame=100,
                                 n_components=10,
                                 function_set=function_set,
                                 parsimony_coefficient=0.0005,
                                 max_samples=0.9,
                                 verbose=1,
                                 random_state=0,
                                 n_jobs=3)

        # 使用 stacking 的方式得到 generic feature 感觉更为合理
        gp.fit(self.__train, self.__train_label)
        self.__train_gfeature = np.hstack(
            (self.__train, gp.transform(self.__train)))
        self.__test_gfeature = np.hstack(
            (self.__test, gp.transform(self.__test)))
Example #26
0
def test_custom_functions():
    """Test the custom programs example works"""

    rng = check_random_state(0)
    boston = load_boston()
    perm = rng.permutation(boston.target.size)
    boston.data = boston.data[perm]
    boston.target = boston.target[perm]

    def logic(x1, x2, x3, x4):
        return np.where(x1 > x2, x3, x4)

    logical = make_function(function=logic, name='logical', arity=4)

    function_set = ['add', 'sub', 'mul', 'div', logical]
    gp = SymbolicTransformer(generations=2,
                             population_size=2000,
                             hall_of_fame=100,
                             n_components=10,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             random_state=0)

    gp.fit(boston.data[:300, :], boston.target[:300])

    assert_equal(gp._programs[0][906].__str__(),
                 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)')

    dot_data = gp._programs[0][906].export_graphviz()
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="logical", '
                'fillcolor="#136ed4"] ;\n2 [label="X6", fillcolor="#60a6f6"] '
                ';\n3 [label="add", fillcolor="#136ed4"] ;\n4 [label="X11", '
                'fillcolor="#60a6f6"] ;\n5 [label="0.898", '
                'fillcolor="#60a6f6"] ;\n3 -> 5 ;\n3 -> 4 ;\n6 [label="X10", '
                'fillcolor="#60a6f6"] ;\n7 [label="X2", fillcolor="#60a6f6"] '
                ';\n1 -> 7 ;\n1 -> 6 ;\n1 -> 3 ;\n1 -> 2 ;\n8 [label="X5", '
                'fillcolor="#60a6f6"] ;\n0 -> 8 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
def gp_features(df,
                target,
                random_state,
                generations=5,
                function_set=['add', 'sub', 'mul', 'div']):
    X = df.loc[:, (df.columns != target)]
    y = df.loc[:, target]

    gp = SymbolicTransformer(generations=generations,
                             population_size=1000,
                             hall_of_fame=100,
                             n_components=12,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             verbose=0,
                             random_state=random_state,
                             n_jobs=-1)
    gp.fit(pd.get_dummies(X), y)
    df = gp_transform(df, gp.transform, X)

    return df, gp.transform
Example #28
0
def get_feature_symbolic_learning(df, gp_config):
    """

    Parameters
    ----------
    df: pd.DataFrame,the input dataFrame.
    gp_config: GPConfig object, the config object of gplearn.SymbolicTransformer.

    Returns
    -------
    df_t: pd.DataFrame, df with the features of SymbolicTransformer trans.
        The new features named like 'symbolic_component_{0 to n}'(n is the n_components)
    """

    gp = SymbolicTransformer(
        generations=gp_config.generation,
        population_size=gp_config.population_size,
        hall_of_fame=gp_config.hall_of_fame,
        n_components=gp_config.n_components,
        function_set=gp_config.function_set,
        parsimony_coefficient=gp_config.parsimony_coefficient,
        max_samples=gp_config.max_samples,
        verbose=1,
        random_state=0,
        n_jobs=3)

    X = df[gp_config.feature_cols]
    y = df[gp_config.target_col]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        shuffle=True)
    gp.fit(X_train, y_train)
    names = [
        "symbolic_component_" + str(i) for i in range(gp_config.n_components)
    ]
    res = pd.DataFrame(gp.transform(X), columns=names)
    df_t = pd.concat([df, res], axis=1)
    return df_t
Example #29
0
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(population_size=100, generations=2,
                             stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert(len(est1._programs) == 1)

    est1 = SymbolicTransformer(population_size=100, generations=2,
                               stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert(len(est1._programs) == 1)

    est1 = SymbolicClassifier(population_size=100, generations=2,
                              stopping_criteria=.9, random_state=0)
    est1.fit(cancer.data[:400, :], cancer.target[:400])
    assert(len(est1._programs) == 1)
Example #30
0
def test_input_shape():
    """Check changed dimensions cause failure"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    X2 = np.reshape(random_state.uniform(size=45), (5, 9))

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.predict, X2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.transform, X2)
Example #31
0
def test_custom_transformer_metrics():
    """Check whether greater_is_better works for SymbolicTransformer."""

    est_gp = SymbolicTransformer(generations=2,
                                 population_size=100,
                                 hall_of_fame=10,
                                 n_components=1,
                                 metric='pearson',
                                 random_state=415)
    est_gp.fit(boston.data, boston.target)
    for program in est_gp:
        formula = program.__str__()
    expected_formula = ('sub(div(mul(X4, X12), div(X9, X9)), '
                        'sub(div(X11, X12), add(X12, X0)))')
    assert_equal(expected_formula, formula, True)

    def _neg_weighted_pearson(y, y_pred, w):
        """Calculate the weighted Pearson correlation coefficient."""
        with np.errstate(divide='ignore', invalid='ignore'):
            y_pred_demean = y_pred - np.average(y_pred, weights=w)
            y_demean = y - np.average(y, weights=w)
            corr = (
                (np.sum(w * y_pred_demean * y_demean) / np.sum(w)) / np.sqrt(
                    (np.sum(w * y_pred_demean**2) * np.sum(w * y_demean**2)) /
                    (np.sum(w)**2)))
        if np.isfinite(corr):
            return -1 * np.abs(corr)
        return 0.

    neg_weighted_pearson = make_fitness(function=_neg_weighted_pearson,
                                        greater_is_better=False)

    c_est_gp = SymbolicTransformer(generations=2,
                                   population_size=100,
                                   hall_of_fame=10,
                                   n_components=1,
                                   stopping_criteria=-1,
                                   metric=neg_weighted_pearson,
                                   random_state=415)
    c_est_gp.fit(boston.data, boston.target)
    for program in c_est_gp:
        c_formula = program.__str__()
    assert_equal(expected_formula, c_formula, True)
Example #32
0
def getSymbolTrans(train, valid, y, random_state=888):

    X_train = train.copy()
    X_valid = valid.copy()
    y_train = y.copy()
    function_set = [
        'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max',
        'min'
    ]

    gp = SymbolicTransformer(generations=20,
                             population_size=2000,
                             hall_of_fame=100,
                             n_components=10,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             verbose=0,
                             random_state=0,
                             n_jobs=3)

    gp.fit(X_train, y_train)

    gp_features_train = gp.transform(X_train)
    dt_gp_features_train = pd.DataFrame(gp_features_train)
    dt_gp_features_train.columns = [
        "ST_" + str(i) for i in range(1, dt_gp_features_train.shape[1] + 1)
    ]
    X_train = X_train.join(dt_gp_features_train)
    X_train = X_train.fillna(0)

    gp_features_valid = gp.transform(X_valid)
    dt_gp_features_valid = pd.DataFrame(gp_features_valid)
    dt_gp_features_valid.columns = [
        "ST_" + str(i) for i in range(1, dt_gp_features_valid.shape[1] + 1)
    ]
    X_valid = X_valid.join(dt_gp_features_valid)
    X_valid = X_valid.fillna(0)

    return (X_train, X_valid)