def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)

    # Check the classifier
    est = SymbolicClassifier(generations=2, random_state=0)
    est.fit(cancer.data[:100, :], cancer.target[:100])
    score = est.score(cancer.data[500:, :], cancer.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(cancer.data[500:, :], cancer.target[500:])
    assert_equal(score, score2)
def train():
    est_gp = SymbolicRegressor(population_size=150,
                               generations=20, stopping_criteria=0.001,
                               p_crossover=0.8, p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05, p_point_mutation=0.05,
                               max_samples=0.9, verbose=1, metric='mean absolute error',
                               parsimony_coefficient=0.01)
    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
def train(x,y_truth,X_train,y_train,X_test,y_test,target_func,noise_rate,noise_level):
    """
    x:  目标函数的分布范围
    y_truth: 目标函数的真实值
    X_train: 训练数据
    y_train: 训练数据值(带噪声)
    X_test: 测试数据
    y_test: 测试数据值
    noise_rate: 噪声率
    noise_level: 噪声水平
    得出用所有数据进行训练的拟合结果。拟合效果有可能会受噪声数据的影响
    """
    #查看训练所用的数据
    print('---训练数据---')
    print(np.c_[X_train,y_train]) 
    #定义符号回归器
    est_gp = SymbolicRegressor(population_size=5000,
                           function_set=['add','sub','mul','div'],#'sin','sqrt','cos'],#,'cos','sqrt','log','abs','neg','inv','tan'],
                           generations=10, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,metric='mean absolute error',
                           parsimony_coefficient=0.01, random_state=0,const_range=(-1,1))
    #用训练集进行拟合训练   
    est_gp.fit(X_train.reshape(-1,1), y_train)
    #得到测试数据的预测值
    y_pred = est_gp.predict(X_test.reshape(-1,1))
    #得到R^2值
    score_gp = est_gp.score(X_test.reshape(-1,1), y_test)
    #训练集的均方误差
    test_mse = mean_squared_error(y_test,y_pred)
    print('拟合结果',str(est_gp._program))
    print('R^2 : %.6f'%score_gp)
    print('MSE : %.6f'%test_mse)
    
    #可视化目标曲线
    plt.xlabel('$x$',fontsize = 18)
    plt.ylabel('$y$',fontsize = 18)
    plt.plot(x,y_truth,label = target_func)
    plt.legend(loc = 'best',fontsize = 18)
    
    #可视化训练数据集
    plt.scatter(X_train,y_train,label = 'NoisyData',alpha = 0.9)
    plt.legend(loc = 'best',fontsize = 18)
        
    #可视化拟合曲线
    data = np.c_[X_test,y_pred]
    data = data[np.lexsort(data[:,::-1].T)]
    plt.plot(data[:,0], data[:,1], label = 'GP : '+str(est_gp._program))
    
    #标题
    fmt = '$R^2 =\/ {0:.6f}$ , $MSE =\/ {1:.6f}$'.format(score_gp,test_mse)
    plt.title(fmt,fontproperties = 'SimHei',fontsize = 20)   
    plt.legend(loc = 'best',fontsize = 18)
def train():
    est_gp = SymbolicRegressor(population_size=150,
                               generations=20,
                               stopping_criteria=0.001,
                               p_crossover=0.8,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.05,
                               max_samples=0.9,
                               verbose=1,
                               metric='mean absolute error',
                               parsimony_coefficient=0.01)
    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
Ejemplo n.º 5
0
def main():
    x = np.genfromtxt('x_train.csv', delimiter=',').reshape((1000, 1))
    y = np.genfromtxt('y_train.csv', delimiter=',')
    est_gp = SymbolicRegressor(population_size=50,
                               generations=20,
                               stopping_criteria=0.01,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               max_samples=0.9,
                               verbose=1,
                               parsimony_coefficient=0.01,
                               random_state=0)
    est_gp.fit(x, y)
    print(est_gp._program)

    est_tree = DecisionTreeRegressor()
    est_tree.fit(x, y)
    est_rf = RandomForestRegressor()
    est_rf.fit(x, y)

    x0 = np.arange(-1, 1, 1 / 10.)
    x1 = np.arange(-1, 1, 1 / 10.)
    x0, x1 = np.meshgrid(x0, x1)
    y_truth = 3 * x0**2 + 5 * x0 + 1  # exact function we are estimating

    y_gp = est_gp.predict(np.c_[x0.ravel()]).reshape(x0.shape)
    score_gp = est_gp.score(x, y)
    y_tree = est_tree.predict(np.c_[x0.ravel()]).reshape(x0.shape)
    score_tree = est_tree.score(x, y)
    y_rf = est_rf.predict(np.c_[x0.ravel()]).reshape(x0.shape)
    score_rf = est_rf.score(x, y)

    for i, (ys, score,
            title) in enumerate([(y_truth, None, "Ground Truth"),
                                 (y_gp, score_gp, "SymbolicRegressor"),
                                 (y_tree, score_tree, "DecisionTreeRegressor"),
                                 (y_rf, score_rf, "RandomForestRegressor")]):
        plt.subplot(2, 2, i + 1)
        plt.plot(x0, ys, 'C0o')
        plt.grid(True, which='both')
        plt.axhline(y=0, color='k')
        plt.axvline(x=0, color='k')
    plt.show()
Ejemplo n.º 6
0
def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)
Ejemplo n.º 7
0
def test_symbolic_regression(plotOnly=True):
    nsample = 4000
    sig = 0.2
    x = np.linspace(-50,50,nsample)
    X = np.column_stack(
        (
            x/5, 
            10*np.sin(x), 
            (x-5)**3, 
            np.ones(nsample)
        )
    )
    beta = [0.01, 1, 0.001, 5.]

    y_true = np.dot(X,beta)
    y = y_true + sig * np.random.normal(size=nsample)

    df = pd.DataFrame()
    df["x"] = x; df["y"] = y

    fig,ax = plt.subplots()
    ax.plot(df.x, df.y, c="k", ls="--",label="Truth")
    ax.set_title("Ground truth")
    
    X = df[["x"]]; y = df.y

    #Split into train test data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=constant
    )

    #Converter to print the function using sympy
    converter = {
        'sub': lambda x, y : x - y,
        'div': lambda x, y : x/y,
        'mul': lambda x, y : x*y,
        'add': lambda x, y : x + y,
        'neg': lambda x    : -x,
        'pow': lambda x, y : x**y,
        'sin': lambda x    : sin(x),
        'cos': lambda x    : cos(x),
        'inv': lambda x: 1/x,
        'sqrt': lambda x: x**0.5,
        'pow3': lambda x: x**3
    }

    if plotOnly == False:
        #Train the regressor
        function_set = [
            "add", "sub", "mul", "div", "cos", "sin", "neg", "inv"
        ]

        #Instantiate the symbolic regression
        SR = SymbolicRegressor(
            population_size=5000,
            function_set=function_set,
            generations=5,
            stopping_criteria=0.01,
            p_crossover=0.7,
            p_subtree_mutation=0.1,
            p_hoist_mutation=0.05,
            p_point_mutation=0.1,
            max_samples=0.9,
            verbose=1,
            parsimony_coefficient=0.001,
            random_state=0,
            feature_names=X_train.columns
        )
        
        #Fit vs. training data
        SR.fit(X_train, y_train)  

        dump(SR,"./SR_Test.bin")

    try:
        SR = load("./SR_Test.bin")
    except:
        raise IOError("./SR_Test.bin does not exist, train a model first")

    print(
        "R$^2$: %s"%(SR.score(X_test, y_test))
    )

    #Write the function expression
    func = sympify(
        (SR._program), locals=converter
    )
    print(func)

    with open("./test_print_formula.txt","w") as f:
        f.write(str(func))

    #Predict using trained SR
    y_pred = SR.predict(df.x.to_numpy().reshape(-1,1))

    #Overlay the plot
    ax.scatter(
        df.x, y_pred, label="SR"
    )
    ax.legend()
    plt.show()
Ejemplo n.º 8
0
        dump(SR,"./TrainedRegressor.pkl")
    else:
        SR = load("./TrainedRegressor.pkl")

    #Print-out the regression formula
    formula = sympify(
        (SR._program), 
        locals=converter
    )

    #Print formula, R2 and save it as a txt
    print("Formula: ", formula)

    print('R$^2$:',SR.score(
        X_test,        #scaled X_test 
        mmy.transform(
                y_test #scaled y_test
            )
        )
    )

    with open("./formula.txt","w") as f:
        f.write(str(formula))

    #Predict the test data
    y_pred = SR.predict(X_test).reshape(-1,1)

    #Scale back y_pred
    y_pred = mmy.inverse_transform(y_pred)

    #Dump y_pred vs y_test
    df = pd.DataFrame()
Ejemplo n.º 9
0
    def GP(self):
        data_training = pd.DataFrame(self.GP_training)
        # data_training.columns = data_training.columns
        target_training = pd.DataFrame(self.training['result'])
        target_training.columns = ['result']

        # symbolic regression
        function_set = ('add', 'sub', 'mul', 'div', 'sqrt', 'max', 'min')
        sr = SymbolicRegressor(population_size=50000,
                               generations=10,
                               stopping_criteria=0.01,
                               function_set=function_set,
                               p_crossover=0.1,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.15,
                               p_point_mutation=0.2,
                               max_samples=0.7,
                               verbose=1,
                               parsimony_coefficient=0.025,
                               random_state=0)
        sr.fit(data_training, target_training)

        # check results
        # Returns the coefficient of determination R^2 of the prediction.
        self.logger.write(sr.score(data_training, target_training))
        self.logger.write("\n")

        data_test = pd.DataFrame(self.GP_testing)
        target_test = pd.DataFrame(self.test['result'])
        predict_test = sr.predict(data_test)
        pre = list()
        for i in predict_test:
            if i < 0:
                pre.append(-1)
            elif i > 0:
                pre.append(1)
            else:
                pre.append(0)
        predict_test = np.asarray(pre)

        target_test.columns = ['result']

        self.logger.write(" 1.1- f1 score for GP: ")
        self.logger.write("\n")
        self.logger.write(f1_score(target_test, predict_test, average='macro'))
        self.logger.write("\n")
        self.logger.write(" 1.2- f1 score for GP None: ")
        self.logger.write("\n")
        self.logger.write(f1_score(target_test, predict_test, average=None))
        self.logger.write("\n")

        # Calculate the accuracy
        self.logger.write("2 - accuracy score for GP: ")
        self.logger.write("\n")
        self.logger.write(
            accuracy_score(target_test, predict_test, normalize=True))
        self.logger.write("\n")

        # KFold Cross Validation approach
        kf = KFold(n_splits=10, shuffle=False)
        kf.split(data_test)

        # Initialize the accuracy of the models to blank list. The accuracy of each model will be appended to this list
        accuracy_model = []
        index = 1
        # Iterate over each train-test split
        for train_index, test_index in kf.split(data_test):
            # Split train-test
            X_train, X_test = data_test.iloc[train_index], data_test.iloc[
                test_index]
            y_train, y_test = target_test.iloc[train_index], target_test.iloc[
                test_index]
            # Train the model
            sr.fit(X_train, y_train)
            # Append to accuracy_model the accuracy of the model
            predict_test = sr.predict(X_test)
            pre = list()
            for i in predict_test:
                if i < 0:
                    pre.append(-1)
                elif i > 0:
                    pre.append(1)
                else:
                    pre.append(0)
            predict_test = np.asarray(pre)
            acc = accuracy_score(y_test, predict_test, normalize=True) * 100
            self.logger.write("K-fold number %d, accuracy_score %d", index,
                              acc)
            self.logger.write("\n")
            accuracy_model.append(acc)
            index += 1

        # Print the accuracy
        self.logger.write("3 - K-Fold Cross Validation for GP: ")
        self.logger.write("\n")
        self.logger.write(accuracy_model)
        self.logger.write("\n")
Ejemplo n.º 10
0
                           verbose=1,
                           parsimony_coefficient=0.01,
                           random_state=0)
est_gp.fit(X_train, y_train)
print(est_gp._program)

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

est_tree = DecisionTreeRegressor()
est_tree.fit(X_train, y_train)
est_rf = RandomForestRegressor()
est_rf.fit(X_train, y_train)

y_gp = est_gp.predict(np.c_[x_0.ravel(), x_1.ravel()]).reshape(x_0.shape)
score_gp = est_gp.score(X_test, y_test)
y_tree = est_tree.predict(np.c_[x_0.ravel(), x_1.ravel()]).reshape(x_0.shape)
score_tree = est_tree.score(X_test, y_test)
y_rf = est_rf.predict(np.c_[x_0.ravel(), x_1.ravel()]).reshape(x_0.shape)
score_rf = est_rf.score(X_test, y_test)

fig = plt.figure(figsize=(8, 6))

for i, (y, score,
        title) in enumerate([(y_truth, None, "Ground Truth"),
                             (y_gp, score_gp, "SymbolicRegressor"),
                             (y_tree, score_tree, "DecisionTreeRegressor"),
                             (y_rf, score_rf, "RandomForestRegressor")]):

    ax = fig.add_subplot(2, 2, i + 1, projection='3d')
    ax.set_xlim(-1, 1)
                           comparison=True,
                           transformer=True,
                           p_crossover=0.7,
                           p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05,
                           p_point_mutation=0.1,
                           max_samples=0.9,
                           verbose=1,
                           const_range=(-20.0, 20.0),
                           parsimony_coefficient=0.01,
                           random_state=1,
                           metric='mse')

est_gp.fit(data_train, data1_train)
print est_gp._program
score_gp = est_gp.score(data_test, data1_test)
print score_gp
p = est_gp.predict(features)

xc = np.arange(0, len(labels1), 1)
xa = np.arange(0, x, 1)
xb = np.arange(x - 1, len(labels1), 1)
#print xb.size
if xb.size != len(p[x - 1:]):
    xb = np.arange(x - 1, len(labels1) - 1, 1)

#print xa.size, xb.size,xc.size
print len(p[0:x]), len(p[x - 1:]), len(labels1)
print p, labels1

fig = plt.figure()
Ejemplo n.º 12
0
#function_set = [logic_and, logic_not,logic_or]
function_set = [logic_and,logic_or,logic_xor,logic_not]
est_gp = SymbolicRegressor(population_size=100,
                           generations=500,
                           #stopping_criteria=0.01,
                           tournament_size=2,
                           function_set= function_set,
                           parsimony_coefficient=0.009,
                           max_samples=1.0,
                           verbose=1,
                           p_crossover=0.9, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.0, p_point_mutation=0.0,
                           n_jobs=-1)


est_gp.fit(X,Y)
print(est_gp._program)
print("-------------------------------")
#print(est_gp._programs)
score_gp = est_gp.score(X, Y)
print(score_gp)
graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz())
graph.write_svg('test.svg')
#res = Image(graph.create_png())
#display(res)




def ransac(x,y_truth,X_train,y_train,X_test,y_test,target_func,noise_rate,noise_level):
    """
    x:  目标函数的分布范围
    y_truth: 目标函数的真实值
    X_train: 训练数据
    y_train: 训练数据值(带噪声)
    X_test: 测试数据
    target_func :目标函数表达式
    y_test: 测试数据值
    noise_rate: 噪声率
    noise_level: 噪声水平
    利用部分数据集进行训练,并根据拟合结果来重新选择训练数据,
    通过这种方式来提高对噪声的鲁棒性。    
    """
    
    #最大迭代次数
    max_iter = 5
    #数据集大小
    length = X_train.shape[0]
    #噪声数据
    y_noise = y_train 
    #噪声数据数量
    noise_count = int(length*noise_rate)
    #训练数据数量,如果噪声率λ小于0.5,则选取(1-λ)length个数据点,
    #否则选取0.5length个数据点
    if noise_rate <= 0.5:
        pure_count = length - noise_count
    else:
        pure_count = length//2
    #迭代计数
    count = 0
    #测试集R^2得分
    test_score = []
    #测试集均方误差
    test_mse = []
    #拟合曲线表达式
    result = []
    #训练集
    train_data = np.c_[X_train,y_noise]
    print('------所有训练数据------')
    print(train_data)
    print('-----------------------')
    
    #随机采样初始训练数据集,定义一个顺序列表,将列表打乱取前qure_count个索引数据
    lst = list(range(length))
    np.random.shuffle(lst)
    #初始训练数据:数量为无噪声数据个数
    random_train_data = train_data[lst[:pure_count]]
    #保存每轮训练的数据集
    data_list = [0]*max_iter
    
    while count < max_iter:
        #保存当前训练数据集
        data_list[count] = random_train_data

        print('------------------------------第'+str(count)+'轮训练------------------------------')
        print('----该轮训练使用的数据----')
        print(random_train_data)
        print('-------------------------')
        #符号回归器
        est_gp = SymbolicRegressor(population_size=5000,
                           function_set=['add','sub','mul','div'],#'sin','sqrt','cos'],#,'cos','sqrt','log','abs','neg','inv','tan'],
                           generations=10, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,metric='mean absolute error',
                           parsimony_coefficient=0.01, random_state=0,const_range=(-1,1))
        
        #用训练数据去拟合
        est_gp.fit(random_train_data[:,0].reshape(-1,1), random_train_data[:,1])
        #得到拟合表达式
        print('拟合结果 : ',est_gp._program)
        result.append(str(est_gp._program))
        #所有数据集的预测值
        y_pred = est_gp.predict(X_train.reshape(-1,1))
        #测试集的预测值
        ytest_pred = est_gp.predict(X_test.reshape(-1,1))
        #用于训练数据的预测值
        #ytrain_pred = est_gp.predict(random_train_data[:,0].reshape(-1,1))
        #测试集的R^2值
        score = est_gp.score(X_test.reshape(-1,1),y_test)
        test_score.append(score)
        #训练数据的均方误差
        mse = mean_squared_error(ytest_pred,y_test)
        test_mse.append(mse)
        
        #所有训练数据值与预测值的差值
        diff = abs(y_pred - y_train)
        #选取差值最小的前pure_count组数据,当做下一轮训练数据
        flag = np.where(diff < sorted(diff)[pure_count],1,0)
        temp_data = train_data[flag == 1]
        #如果新训练集和上轮数据一样,则退出循环
        if (temp_data == random_train_data).all():
            break
        else:
        #新的训练数据
            random_train_data = train_data[flag == 1]
            
        count += 1
        print('MSE : {0} , R^2 : {1} '.format(test_mse[-1],test_score[-1])) 
        
    ytest_pred = est_gp.predict(X_test.reshape(-1,1))
    ytest_score = est_gp.score(X_test.reshape(-1,1),y_test)
    ytest_mse = mean_squared_error(ytest_pred,y_test)
    
    #可视化目标函数
    plt.xlabel('$x$',fontsize = 18)
    plt.ylabel('$y$',fontsize = 18)
    plt.plot(x,y_truth,label = target_func)
    plt.legend(loc = 'best',fontsize = 18)
    
    #可视化训练数据
    plt.scatter(X_train,y_noise,label = 'NoisyData',alpha = 0.9)
    plt.legend(loc = 'best',fontsize = 18)
        
    #可视化拟合曲线 
    data = np.c_[X_test,ytest_pred]
    data = data[np.lexsort(data[:,::-1].T)]
    plt.plot(data[:,0], data[:,1], label = 'RCGP : '+str(est_gp._program))
    
    fmt = '$R^2 =\/ {0:.6f}$ , $MSE =\/ {1:.6f}$'.format(ytest_score,ytest_mse)
    plt.title(fmt,fontproperties = 'SimHei',fontsize = 20)
    plt.legend(loc = 'best',fontsize = 16)
    print(result)
    print('mse: ',test_mse)
    print('R^2: ',test_score)
    print()
    print("R^2 : %.6f"%test_score[-1])
    print("MSE : %.6f"%test_mse[-1])
    #print(est_gp.score(X_test.reshape(-1,1),y_test))
    return data_list
warnings.filterwarnings("ignore")

def is_less_than_zero(x):
    result = (x < 0)
    return result.astype(int)

def is_greater_than_or_equal_to_zero(x):
    result = (x >= 0)
    return result.astype(int)

is_lt_zero = make_function(is_less_than_zero, "is_lt_zero", arity=1)
is_gte_zero = make_function(is_greater_than_or_equal_to_zero, "is_gte_zero", arity=1)

function_set = [is_lt_zero, is_gte_zero, "mul", "add", "neg"]

X = np.arange(-10, 11).reshape(-1, 1)
y = np.abs(X).reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X.tolist(), y.tolist())

my_abs_gp = SymbolicRegressor(function_set=function_set,
                              init_method="grow",
                              parsimony_coefficient=0.0625,
                              verbose=True)

my_abs_gp.fit(X_train, y_train)

print(my_abs_gp.score(X_test, y_test))
print(my_abs_gp._program)
Ejemplo n.º 15
0
        i += 1
        continue
    locations = line.split()
    if len(locations) == 2:
        x.append([float(locations[0])])
        y.append(float(locations[1]))
    else:
        continue

est_gp = SymbolicRegressor(population_size=5000,
                           generations=15,
                           stopping_criteria=0.01,
                           p_crossover=0.7,
                           p_subtree_mutation=0.1,
                           p_hoist_mutation=0.08,
                           p_point_mutation=0.1,
                           max_samples=0.9,
                           verbose=1,
                           parsimony_coefficient=0.01,
                           random_state=50)

est_gp.fit(x, y)
print("Accuracy: " + str(est_gp.score(x, y) * 100) + "%")

print("Function: " + str(est_gp))

# noinspection PyProtectedMember
# graph = pydotplus.graphviz.graph_from_dot_data(est_gp._program.export_graphviz())
# Image(graph.create_png())
# graph.write_png("dtree.png")
Ejemplo n.º 16
0
                           init_depth=(6, 13),
                           max_samples=0.4,
                           verbose=1,
                           n_jobs=-1,
                           metric='rmse',
                           parsimony_coefficient=0.0005,
                           random_state=1234)

if (regressor >= 3):
    gp.fit(train_x, train_y)
    predict_y = gp.predict(test_x)
    predict_y[predict_y < 0] = 0  # only positive values

    print('\nDetails about the results using Genetic Programming\n')
    print(gp._program)
    print('R2(max)    = ', gp.score(train_x, train_y))

    # summary of the results
    print('Raw fitness = ', gp._program.raw_fitness_)
    #print('Fitness     = ',gp._program.fitness_)
    print('OOB fitness = ', gp._program.oob_fitness_)
    print('Depth       = ', gp._program.depth_)
    print('Length      = ', gp._program.length_, '\n')
    '''
    Comments:
    raw_fitness_ : The raw fitness of the individual program.
    fitness_     : The penalized fitness of the individual program.
    oob_fitness_ : The out-of-bag raw fitness of the individual program for the held-out samples. 
                     Only present when sub-sampling was used in the estimator by 
                     specifying max_samples < 1.0.
    depth_       : The maximum depth of the program tree.
Ejemplo n.º 17
0
    # data = np.loadtxt('mydata2.txt')
    # X_train = data[:, 0:8]
    # Y_train = data[:, 8]
    #
    # X_test = X_train
    # Y_test = Y_train

    random_engine = check_random_state(0)
    X_train = random_engine.uniform(-1, 1, 10000).reshape(-1, 1)
    Y_train = np.sinh(X_train)
    X_test = random_engine.uniform(-1, 1, 10000).reshape(-1, 1)
    Y_test = np.sinh(X_test)

    est_gp = SymbolicRegressor(population_size=5000, generations=1000,
                               stopping_criteria=0.01, p_crossover=0.6,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.1, p_point_mutation=0.1,
                               max_samples=0.9, verbose=1,
                               parsimony_coefficient=0.01, n_jobs=1,
                               function_set=('add', 'mul', 'max'))

    est_gp.fit(X_train, Y_train)
    print(est_gp)
	with open('result.txt', 'w') as f:
	    f.write(est_gp)

    score_train = est_gp.score(X_train, Y_train)
    score_test = est_gp.score(X_test, Y_test)
    print(score_train, score_test)

                           comparison=True,
                           transformer=True,
                           p_crossover=0.7,
                           p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05,
                           p_point_mutation=0.1,
                           max_samples=0.9,
                           verbose=1,
                           const_range=(-20.0, 20.0),
                           parsimony_coefficient=0.01,
                           random_state=1)

est_gp.fit(da_train, tar_train)

print est_gp._program
score_gp = est_gp.score(da_test, tar_test)
print score_gp
p = est_gp.predict(da)
print r2_score(tar, p)
print explained_variance_score(tar, p)
print mean_squared_error(tar, p)

#决策树
est_dt = DecisionTreeRegressor()
est_dt = est_dt.fit(da_train, tar_train)
p1 = est_dt.predict(da)
print est_dt.score(da_test, tar_test)
print r2_score(tar, p1)
print explained_variance_score(tar, p1)
print mean_squared_error(tar[x:], p1[x:])
Ejemplo n.º 19
0
def test_symbolic_regressor():
    """Check that SymbolicRegressor example works"""

    rng = check_random_state(0)
    X_train = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_train = X_train[:, 0] ** 2 - X_train[:, 1] ** 2 + X_train[:, 1] - 1
    X_test = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_test = X_test[:, 0] ** 2 - X_test[:, 1] ** 2 + X_test[:, 1] - 1

    est_gp = SymbolicRegressor(population_size=5000, generations=20,
                               stopping_criteria=0.01, p_crossover=0.7,
                               p_subtree_mutation=0.1, p_hoist_mutation=0.05,
                               p_point_mutation=0.1, max_samples=0.9,
                               parsimony_coefficient=0.01, random_state=0)
    est_gp.fit(X_train, y_train)

    assert_equal(len(est_gp._programs), 7)
    expected = 'sub(add(-0.999, X1), mul(sub(X1, X0), add(X0, X1)))'
    assert_equal(est_gp.__str__(), expected)
    assert_almost_equal(est_gp.score(X_test, y_test), 0.99999, decimal=5)
    dot_data = est_gp._program.export_graphviz()
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="add", fillcolor="#136ed4"] '
                ';\n2 [label="-0.999", fillcolor="#60a6f6"] ;\n3 [label="X1", '
                'fillcolor="#60a6f6"] ;\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", '
                'fillcolor="#136ed4"] ;\n5 [label="sub", fillcolor="#136ed4"] '
                ';\n6 [label="X1", fillcolor="#60a6f6"] ;\n7 [label="X0", '
                'fillcolor="#60a6f6"] ;\n5 -> 7 ;\n5 -> 6 ;\n8 [label="add", '
                'fillcolor="#136ed4"] ;\n9 [label="X0", fillcolor="#60a6f6"] '
                ';\n10 [label="X1", fillcolor="#60a6f6"] ;\n8 -> 10 ;\n8 -> 9 '
                ';\n4 -> 8 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
    assert_equal(est_gp._program.parents, {'method': 'Crossover',
                                           'parent_idx': 1555,
                                           'parent_nodes': range(1, 4),
                                           'donor_idx': 78,
                                           'donor_nodes': []})
    idx = est_gp._program.parents['donor_idx']
    fade_nodes = est_gp._program.parents['donor_nodes']
    assert_equal(est_gp._programs[-2][idx].__str__(), 'add(-0.999, X1)')
    assert_almost_equal(est_gp._programs[-2][idx].fitness_, 0.351803319075)
    dot_data = est_gp._programs[-2][idx].export_graphviz(fade_nodes=fade_nodes)
    expected = ('digraph program {\nnode [style=filled]\n0 [label="add", '
                'fillcolor="#136ed4"] ;\n1 [label="-0.999", '
                'fillcolor="#60a6f6"] ;\n2 [label="X1", fillcolor="#60a6f6"] '
                ';\n0 -> 2 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
    idx = est_gp._program.parents['parent_idx']
    fade_nodes = est_gp._program.parents['parent_nodes']
    assert_equal(est_gp._programs[-2][idx].__str__(),
                 'sub(sub(X1, 0.939), mul(sub(X1, X0), add(X0, X1)))')
    assert_almost_equal(est_gp._programs[-2][idx].fitness_, 0.17080204042)
    dot_data = est_gp._programs[-2][idx].export_graphviz(fade_nodes=fade_nodes)
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="sub", fillcolor="#cecece"] '
                ';\n2 [label="X1", fillcolor="#cecece"] ;\n3 [label="0.939", '
                'fillcolor="#cecece"] ;\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", '
                'fillcolor="#136ed4"] ;\n5 [label="sub", fillcolor="#136ed4"] '
                ';\n6 [label="X1", fillcolor="#60a6f6"] ;\n7 [label="X0", '
                'fillcolor="#60a6f6"] ;\n5 -> 7 ;\n5 -> 6 ;\n8 [label="add", '
                'fillcolor="#136ed4"] ;\n9 [label="X0", fillcolor="#60a6f6"] '
                ';\n10 [label="X1", fillcolor="#60a6f6"] ;\n8 -> 10 ;\n8 -> 9 '
                ';\n4 -> 8 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
plt.plot(timeline, sample_data)
plt.show()

# Train/Test separation
X_train, X_test, y_train, y_test = train_test_split(timeline, sample_data, test_size=0.2)

# Apply regressor
reg = SymbolicRegressor(population_size=2000,
                        generations=20, stopping_criteria=0.01,
                        p_crossover=0.7, p_subtree_mutation=0.1,
                        p_hoist_mutation=0.05, p_point_mutation=0.1,
                        max_samples=0.9, verbose=1,
                        parsimony_coefficient=0.01, random_state=0,
                        function_set=('add', 'sub', 'mul', 'div', 'sin', 'cos', 'tan', 'abs', 'log'))
reg.fit(X_train.reshape(-1, 1), y_train)
score = reg.score(X_test.reshape(-1, 1), y_test)
print("Function Regressed:", reg._program, " | Score:", score)

# Create validation data with fault and labels
validation_timeline, validation_data, real_labels = gen_validation_data(parametric_function,
                                                                        timeline_end,
                                                                        validation_max_end,
                                                                        points_count,
                                                                        noise_std,
                                                                        validation_fault_count)

# Validate fault detection
reconstruction = reg.predict(validation_timeline.reshape(-1, 1))
res = minimize(calc_labels_fitness, np.array([initial_fault_limiar]), method='Nelder-Mead')
rec_labels = calc_labels(res.x[0], reconstruction, validation_data)
print(confusion_matrix(real_labels, rec_labels))
Ejemplo n.º 21
0
                         p_subtree_mutation=0.1,
                         p_hoist_mutation=0.05,
                         p_point_mutation=0.1,
                         max_samples=0.9,
                         verbose=1,
                         parsimony_coefficient=0.01,
                         random_state=0)

genp.fit(x_train, y_train)

print(genp._program)

pre = genp.predict(x_val)

print('accuracy on training set\n')
genp.score(x_train, y_train)

print('accuracy on validation set\n')
genp.score(x_val, y_val)
#%%
"""
#%
df2 = cp.deepcopy(df)
df2['Survived'] = df2['Survived'].astype('str').replace('0','-1').astype('int64')
df2.corr()

df.corr().loc['Survived'].plot.bar()

df.plot.scatter(x= 'Pclass', y = 'Survived')
df.plot.scatter(x= 'Fare', y = 'Survived')
Ejemplo n.º 22
0
                           stopping_criteria=0.01,
                           p_crossover=0.7,
                           p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05,
                           p_point_mutation=0.1,
                           max_samples=0.9,
                           verbose=1,
                           parsimony_coefficient=0.01,
                           random_state=0,
                           function_set=('add', 'sub', 'mul', 'div', 'sqrt',
                                         'log', 'abs', 'neg', 'inv', 'max',
                                         'min', 'sin', 'cos', 'tan'))
est_gp.fit(trainSet, z_train)
print(est_gp._program)

score_gp = est_gp.score(testSet, z_test)

#score_gp = est_gp.mean_absolute_error(testSet, z_test)

print(score_gp)
#19 generations required
#min(add(add(log(add(inv(div(sin(X0), mul(X0, 0.952))),neg(abs(X0)))), neg(inv(div(sin(X1), mul(X0, 0.952))))), min(add(log(add(min(mul(-0.020, X1), cos(X1)), neg(add(log(add(min(inv(div(sin(X1), mul(X0, 0.952))), cos(X1)), div(sin(X1), add(log(add(min(mul(-0.020, X1), cos(X1)), neg(add(log(cos(X1)), neg(0.952))))), add(tan(tan(sin(X1))), neg(inv(div(sin(X1), neg(div(X1, 0.794)))))))))), neg(0.952))))), add(tan(tan(sin(X1))), neg(inv(div(sin(X1), neg(div(X1, 0.794))))))), div(neg(cos(tan(X1))), inv(mul(add(X1, X1), log(X1)))))), div(neg(cos(inv(mul(add(X1, X1), log(X1))))), inv(mul(add(X1, X1), log(X1)))))

z_gp = est_gp.predict(np.c_[x.ravel(), y.ravel()]).reshape(x.shape)
#print(z_gp)
ax = plt.figure().gca(projection='3d')
ax.set_xlim(-10, 10)
ax.set_ylim(-10, 10)
#surf = ax.plot_trisurf(x_test, y_test, z_gp, color='green')
surf = ax.plot_surface(x,
                       y,
def gplearn_procedure(equation_id,
                      no_samples=1000,
                      input_range=(-1, 1),
                      save_path=None,
                      save=True,
                      load=True,
                      func_set=[
                          'add', 'sub', 'mul', 'div', 'log', 'sqrt', 'cos',
                          'tan', 'sin', 'pow', 'exp'
                      ],
                      verbose=1):
    """
    Uses gplearn to attempt to predict the equation form of 'equation_id'
    Renders a graphviz image to images/gplearn/
    returns predicted equation, R^2 score and time taken
    
    Parameters
    ----------
    equation_id : string
        The ID of an equation in the dataset. Must be a valid one

    no_samples : int 
        The number of samples you want fed in to the algorithm

    input_range: tuple(float, float)
        The minimum and maximum values of all input parameters
    save_path: string path
        The path to where you wish the save this dataframe
    save: boolean
        Saves file to save_path iff True
    load: boolean
        If true then looks for file in save_path and loads it preemptively if it is there

    func_set : list
        List of strings i.e names of functions to include / operations to consider
        current options include
        ‘add’ : addition, arity=2.
        ‘sub’ : subtraction, arity=2.
        ‘mul’ : multiplication, arity=2.
        ‘div’ : protected division where a denominator near-zero returns 1., arity=2.
        ‘sqrt’ : protected square root where the absolute value of the argument is used, arity=1.
        ‘log’ : protected log where the absolute value of the argument is used and a near-zero argument returns 0., arity=1.
        ‘abs’ : absolute value, arity=1.
        ‘neg’ : negative, arity=1.
        ‘inv’ : protected inverse where a near-zero argument returns 0., arity=1.
        ‘max’ : maximum, arity=2.
        ‘min’ : minimum, arity=2.
        ‘sin’ : sine (radians), arity=1.
        ‘cos’ : cosine (radians), arity=1.
        ‘tan’ : tangent (radians), arity=1.

        'exp' : exponential (self defined), arity=1
        'pow' : power (self defined), arity=2

    verbose : int
        controls how much is printed, 0 is quitest

    Returns
    -------
    string, float, float
    """
    try:
        df = create_dataset(equation_id,
                            no_samples=no_samples,
                            input_range=input_range,
                            save_path=save_path,
                            save=save,
                            load=load).dropna()
        X = df.drop('target', axis=1)
        y = df['target']
    except:
        traceback.print_exc()
        print(f"Error on equation {equation_id} skipping")
        return '', 0, 0
    no_samples = min(no_samples, len(y))

    default_func_set = ('add', 'sub', 'mul', 'div', 'log', 'sqrt', 'cos',
                        'tan', 'sin', 'abs', 'neg', 'inv', 'max', 'min')
    final_func_set = []
    for func in func_set:
        if func in default_func_set:
            final_func_set.append(func)
        else:
            if func == "pow":
                final_func_set.append(make_function(power, func, 2))
            elif func == "exp":
                final_func_set.append(make_function(exponent, func, 1))
            elif func == "pi":
                final_func_set.append(make_function(pi, func, 0))
            else:
                warnings.warn(
                    f"{func} is an unrecognized function, skipping it")
                pass

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=10,
                               stopping_criteria=0.01,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               max_samples=0.9,
                               function_set=final_func_set,
                               verbose=verbose,
                               parsimony_coefficient=0.01,
                               random_state=0)

    start = time.time()
    hist = est_gp.fit(X[:no_samples], y[:no_samples])
    end = time.time()
    #print(est_gp._program)
    dot_data = est_gp._program.export_graphviz()
    graph = graphviz.Source(dot_data)
    graph.render(f'images/gplearn/{equation_id}_estimate',
                 format='png',
                 cleanup=True)
    return est_gp._program, est_gp.score(X, y), end - start