Beispiel #1
0
    def regression(self, metric="root_mean_squared_error", folds=10, alphas=[], graph=False):
        size = 1.3 * self.report_width // 10

        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso regressor"]                   = lassor()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge regressor"]                   = rr(alpha=0, normalize=True)
        models["Ridge CV regressor"]                = rcvr(alphas = alphas)
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["SGD regressor"]                     = sgdr(max_iter=10000, warm_start=True)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()
        models["Ada boost regressor"]               = abr()
        models["Gradient boost regressor"]          = gbr()
        models["Support vector regressor"]          = svr()
        self.models = models

        print('\n')
        print(self.report_width * '*', '\n*')
        print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*')
        #kf = StratifiedKFold(n_splits=folds, shuffle=True)
        kf = KFold(n_splits=folds)
        results = []
        names = []
        for model_name in models:
            cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train.values.ravel(), cv=kf, scoring=metric)  
            results.append(cv_scores)
            names.append(model_name)
        print(self.report_width * '*', '')
        report = pd.DataFrame({'Regressor': names, 'Score': results})
        report['Score (avg)'] = report.Score.apply(lambda x: x.mean())
        report['Score (std)'] = report.Score.apply(lambda x: x.std())
        report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)']
        report.sort_values(by='Score (avg)', inplace=True)
        report.drop('Score', axis=1, inplace=True)
        display(report)
        print('\n')
        if graph:
            fig, ax = plt.subplots(figsize=(size, 0.5 * size))
            plt.title('Regressor Comparison')
            #ax = fig.add_subplot(111)
            plt.boxplot(results)
            ax.set_xticklabels(names)
            plt.xticks(rotation=45)
            plt.subplots_adjust(hspace=0.0)
            plt.show()             
        return None
def best_model(xt, xv, yt, yv):
	models = []

	name_dt = "DecisionTreeRegressor"
	model_dt = dtr(random_state=1) # decision tree
	model_dt.fit(xt, yt)
	models.append({'name': name_dt, 'model': model_dt, 'mae': get_mae(model_dt, xv, yv)})

	name_rf = "RandomForestRegressor"
	model_rf = rfr(random_state=1) # random forest
	model_rf.fit(xt, yt)
	models.append({'name': name_rf, 'model': model_rf, 'mae': get_mae(model_rf, xv, yv)})

	name_xgb = "XGBRegressor"
	model_xgb = xgb(random_state=1, n_estimators=10000, learning_rate=0.01) # xgboost
	model_xgb.fit(xt, yt, early_stopping_rounds=10, eval_set=[(xv, yv)], verbose=False)
	models.append({'name': name_xgb, 'model': model_xgb, 'mae': get_mae(model_xgb, xv, yv)})
	
	print("\n")
	for m in models:
		print("Model {} has MAE {}".format(m.get('name'), m.get('mae')))

	min_mae = min(i['mae'] for i in models)
	best_model = [m for m in models if m.get('mae') == min_mae]
	print("\nBest model pick: ", best_model[0].get('name'))
	print("\n")

	return best_model[0].get('model')
 def __init__(self, pathToData):
     self.dataFilePath = pathToData
     self.algoname = 'Boosting'
     self.datasetName = 'Abalone'
     self.baseEstimater = dtr()
     self.classifier = abr(base_estimator=self.baseEstimater)
     self.cv = 5
Beispiel #4
0
def regressor(file, X, Y, x, y):
    param = []
    acc = []
    criterion = ['mse', 'friedman_mse', 'mae']
    for i in it.product(criterion, splitter, max_depth, min_samples_split,
                        min_samples_leaf, min_weight_fraction_leaf,
                        max_features, random_state, max_leaf_nodes, presort):
        # print(*i)
        dtree = dtr([*i])
        dtree.fit(X, Y)
        # print('Accuracy: ' + str(dtree.score(x,y)) + '\n')
        acc.append(dtree.score(x, y))
        param.append([*i])
    _results(file, acc, param)
Beispiel #5
0
model = joblib.load('gbdt_125.model')
model = joblib.load('gbdt_132.model')
model = joblib.load('gbdt_151.model')
model = joblib.load('gbdt_170.model')
model.learning_rate = 0.005
model.n_estimators = 3000
model.max_leaf_nodes = 60
model.max_depth = 10
model.subsample = 0.16
model.max_features = 0.04
#lr=joblib.load('gbdt_97.model')
#lr=model
#model=GradientBoostingRegressor(init=lr,loss='ls',n_estimators=50,\
#learning_rate=0.01,max_depth=10,min_samples_leaf=20,\
#max_features=0.05,subsample=0.2,max_leaf_nodes=60)
bm = dtr(max_depth=6, min_samples_leaf=2, max_leaf_nodes=60, splitter='random')
#model=BaggingRegressor(base_estimator=bm,n_estimators=2000,bootstrap=True,\
#bootstrap_features=1,max_samples=0.16,max_features=0.05)
model=AdaBoostRegressor(n_estimators=300,learning_rate=0.03,\
loss='square',base_estimator=bm)

model.fit(xfit, yfit.flatten())
probs = 1 * model.predict(xval) + 1 * Yam.flatten()
fpr, tpr, thresholds = roc_curve(yval, probs)
roc_auc = auc(fpr, tpr)
print(roc_auc)

probs = f_predict1(xval, Yam)
ytest = f_predict1(x0t, ytest0)

probs = 1 * Yam.flatten()
Beispiel #6
0
# rng.rand(10) #生成10个随机数
# rng.rand(2,3) #生成2行3列的数据

#生成横坐标数据
X = np.sort(
    5 * rng.rand(80, 1), axis=0
)  #我们需要80个随机数,但因为回归树模型只能输入二维数组,所以为了符合模型我们添加了一列(80,1)。rand()默认给的是[0-1)之间的数据,我们需要0-5之间的数组需要在此基础上x5
#生成纵坐标数据
Y = np.sin(X).ravel(
)  #使用np的sin函数将X轴数据带入,生成完美的正弦曲线。纵坐标不同于横坐标,纵坐标只能是一维数组,所以需要使用ravel方法降维
#将y轴加上噪声,因为现实中是不可能存在完美的正弦曲线图。Y[::5] 行:列:步长 所有的行和所有的列每隔5个取一个,在每个点基础上利用0.5-numpy的[0-1)的随机数*3 扩大噪音的影响
Y[::5] += 3 * (0.5 - rng.rand(16))

#代入数据训练模型

dtr1 = dtr(max_depth=2)  #设置模型最大深度2
dtr2 = dtr(max_depth=5)  #设置模型最大深度5
dtr1.fit(X, Y)  #代入训练1
dtr2.fit(X, Y)  #代入训练2

#生成测试数据,也是使用numpy。 arange(起始点,结束点,步长),使用newaxis升维,因为fit接口只支持X轴的二维数组
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]

#用训练好的模型测试数据,predict返回测试后的结果
Y1_test = dtr1.predict(X_test)
Y2_test = dtr2.predict(X_test)

#查看数据生成的图像
#=20散点图的大小 edgecolor="black" 边框颜色,c="darkorange"点的颜色,label="data" 纵坐标轴的数据
#label="max_depth=2" 折线图名称 ,linewidth=2线宽
plt.figure()  #准备画布
Beispiel #7
0
#======= partition the data ===================================================================================================#
#   Partitioning the data in this way allows us to evaluate how our model might perform on data that it has never seen before.
#   If we train the model on all of the test data, it will be difficult to tell if overfitting has taken place.
#==============================================================================================================================#
# also state how many percentage from train data set, we want to take as test data set
# In this example, about 33% of the data is devoted to the hold-out set.
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    data['SalePrice'],
                                                    random_state=42,
                                                    test_size=.33)

# fitting a decision tree regression model...
#==============================================================================================================================#
print('fitting a decision tree regression model...')
DTR_1 = dtr(max_depth=None
            )  # declare the regression model form. Let the depth be default.
# DTR_1.fit(X,Y) # fit the training data
scores_dtr = cross_val_score(
    DTR_1, X_train, y_train, cv=10,
    scoring="explained_variance")  # 10-fold cross validation
print("scores for k=10 fold validation:", scores_dtr)
print("Est. explained variance: %0.2f (+/- %0.2f)" %
      (scores_dtr.mean(), scores_dtr.std() * 2))
#==============================================================================================================================#
sorted_scores = Feature_Ranking(X_train, y_train)
estimators = [10, 20, 30, 40, 50, 60, 70, 80]
# top 15...
mean_rfrs, std_rfrs_upper, std_rfrs_lower = getModel(X_train, y_train,
                                                     sorted_scores, 15,
                                                     estimators)
plotResults(mean_rfrs, std_rfrs_upper, std_rfrs_lower, 15, estimators)
    def regression(self, metric, folds=10, alphas=[], printt=True, graph=False):
        size = self.graph_width

        # significant model setup differences should be list as different models
        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso regressor"]                   = lassor()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge regressor"]                   = rr(alpha=0, normalize=True)
        models["Ridge CV regressor"]                = rcvr(alphas = alphas)
        models["Elastic net regressor"]             = enr()
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["SGD regressor"]                     = sgdr(max_iter=10000, warm_start=True)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()
        models["Ada boost regressor"]               = abr()
        models["Gradient boost regressor"]          = gbr()
        models["Support vector regressor RBF"]      = svr()
        models["Support vector regressor Linear"]   = svr('linear')
        models["Support vector regressor Poly"]     = svr(kernel='poly')
        self.models = models

        kf = KFold(n_splits=folds, shuffle=True)
        results = []
        names = []
        et = []
        for model_name in models:
            start = time.time()
            cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train, cv=kf, scoring=metric)  
            results.append(cv_scores)
            names.append(model_name)
            et.append((time.time() - start))
        report = pd.DataFrame({'Model': names, 'Score': results, 'Elapsed Time': et})
        report['Score (avg)'] = report.Score.apply(lambda x: np.sqrt(x).mean())
        report['Score (std)'] = report.Score.apply(lambda x: np.sqrt(x).std())
        report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)']
        report.sort_values(by='Score (avg)', inplace=True)
        report.drop('Score', axis=1, inplace=True)
        report.reset_index(inplace=True, drop=True)
        self.report_performance = report
        
        if printt:
            print('\n')
            print(self.report_width * '*', '\n*')
            print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*')
            print(self.report_width * '*', '')
            print(report)
            print('\n')

        if graph:
            fig, ax = plt.subplots(figsize=(size, 0.5 * size))
            plt.title('Regressor Comparison')
            #ax = fig.add_subplot(111)
            plt.boxplot(results)
            ax.set_xticklabels(names)
            plt.xticks(rotation=45)
            plt.subplots_adjust(hspace=0.0, bottom=0.25)
            self.graphs_model.append(fig)
            plt.show()             
        return None
Beispiel #9
0
litprop = literature_data['exponent']
X_train, X_test, y_train, y_test = train_test_split(litfeat, litprop, test_size=0.2, random_state=4)

#-------------------------------linear regression train and test------------------------------------------------------------------

linreg = lr(normalize=True)
linreg.fit(X_train, y_train)
linreg_pred = linreg.predict(X_test)

linreg_rmse = mean_squared_error(y_test, linreg_pred)
print('linreg MAE: ' + str(sum(abs(linreg_pred - y_test))/(len(y_test))))
print('linreg RMSE: ' + str(np.sqrt(linreg_rmse)))

#-------------------------------decision tree train and test------------------------------------------------------------------

dectree = dtr()
dectree.fit(X_train,y_train)
dectree_pred = dectree.predict(X_test)

dectree_rmse = mean_squared_error(y_test, dectree_pred)
print('dectree MAE: ' + str(sum(abs(dectree_pred - y_test))/(len(y_test))))
print('dectree RMSE: ' + str(np.sqrt(dectree_rmse)))

#-------------------------------random forest train and test---------------------------------------------------------------------

randomforestmodel = rfr()
randomforestmodel.fit(X_train, y_train)
rf_pred = randomforestmodel.predict(X_test)

rf_rmse = mean_squared_error(y_test, rf_pred)
print('rf MAE: ' + str(sum(abs(rf_pred - y_test))/(len(y_test))))
Beispiel #10
0
print('\n')
print('--- start ---')
print('\n')

# get data
data = get_data(PATH)
y = data.CO2  # set prediction metric
X = data[FEATURES]

# split to validation and training data
train_X, val_X, train_y, val_y = tts(X, y, random_state=1)

print('validation MAEs')

# decision tree
model_dt = dtr(random_state=1)
model_dt.fit(train_X, train_y)
get_mae(model_dt, val_X, val_y)

# random forest
model_rf = rfr(random_state=1)
model_rf.fit(train_X, train_y)
get_mae(model_rf, val_X, val_y)

# xgboost
model_xgb = xgb(random_state=1, n_estimators=10000, learning_rate=0.01)
model_xgb.fit(train_X,
              train_y,
              early_stopping_rounds=10,
              eval_set=[(val_X, val_y)],
              verbose=False)
Beispiel #11
0
# Splitting the dataset into the Training set and Test set
"""from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"""

# Feature Scaling
"""from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)"""

# Fitting the Decision Tree Regression to the dataset
from sklearn.tree import DecisionTreeRegressor as dtr

regressor = dtr(random_state=0)
regressor.fit(x, y)

# Predicting a new result
y_pred = regressor.predict(6.5)

# Visualising the Regression results (for higher resolution and smoother curve)
x_grid = np.arange(min(x), max(x), 0.01)
x_grid = x_grid.reshape((len(x_grid), 1))
plt.scatter(x, y, color='red')
plt.plot(x_grid, regressor.predict(x_grid), color='blue')
plt.title('Truth or Bluff (Decision Tree Regression)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()
Beispiel #12
0
 X = np.load('data/X_boston.npy')
 y = np.load('data/y_boston.npy')
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 regressors = [
     lr(),
     bay(),
     rr(alpha=.5, random_state=0),
     l(alpha=0.1, random_state=0),
     ll(),
     knn(),
     ard(),
     rfr(random_state=0, n_estimators=100),
     SVR(gamma='scale', kernel='rbf'),
     rcv(fit_intercept=False),
     en(random_state=0),
     dtr(random_state=0),
     ada(random_state=0),
     gbr(random_state=0)
 ]
 print('unscaled:', br)
 for reg in regressors:
     reg.fit(X_train, y_train)
     rmse, name = get_error(reg, X_test, y_test)
     name = reg.__class__.__name__
     print(name + '(rmse):', end=' ')
     print(rmse)
 print()
 print('scaled:', br)
 scaler = StandardScaler()
 X_train_std = scaler.fit_transform(X_train)
 X_test_std = scaler.fit_transform(X_test)
Beispiel #13
0
    def regression(self, folds=10, printt=True, graph=False):
        size = self.graph_width
        X = self.X
        y = self.y
        safra_range = list(range(len(X.safra.unique())))
        
        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge CV regressor"]                = rcvr()
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()



        report = {"Model":[], "Score (avg)":[], "Score (std)":[], "Elapsed Time(s)":[]}
        for model_name in models:

            score_list = []
            time_list = []
            for i in range(folds):
                rand_ind = random.sample(safra_range,4)
                testX = X[X.safra.isin(rand_ind)]
                testy = y[y.index.isin(testX.index)]
                trainX = X[~X.safra.isin(rand_ind)]
                trainy = y[y.index.isin(trainX.index)]

                start = time.time() 
                model = models[model_name].fit(trainX, trainy)
                score_list.append(model.score(testX, testy))
                time_list.append(time.time()-start)
            
            report["Score (avg)"].append(np.mean(score_list))
            report["Score (std)"].append(np.std(score_list))
            report["Model"].append(model_name)
            report["Elapsed Time(s)"].append(np.mean(time_list))

        report = pd.DataFrame.from_dict(report)
        report.sort_values(by='Score (avg)', inplace=True)
        report.reset_index(inplace=True, drop=True)
        best = report[-1:].values.tolist()[0]
        self.reg = best

        if printt:
            print('REGRESSION RESULTS')
            print('     Best regression method: ', best[0])
            print('     Average score(R2): ', best[1])
            print('     Standard Deviation: ', best[2])
            print('     Elapsed Time(s): ', best[3], '\n')
            #display(report)
            
        if graph:
            model = models[best[0]].fit(trainX, trainy)
            self.pred = model.predict(testX)
            self.testy = testy

            fig, ax = plt.subplots()
            text = 'R2='+str(np.round(best[1],2))
            ax.scatter(testy, self.pred, color='g')
            ax.set_xlabel("True values")
            ax.set_ylabel("Predictions")
            ax.text(0.05, 0.95 , text, transform = ax.transAxes, verticalalignment= 'top', bbox={'boxstyle':'square','facecolor':'none','edgecolor':'black'})
            plt.show()

y = dt.PassengerId
dtf = ['Survived', 'Pclass', 'Age','SibSp', 'Fare']
x = dt[dtf]
x.describe()
x.head()


# In[ ]:



from sklearn.tree import DecisionTreeRegressor as dtr

dtm = dtr(random_state = 1)
dtm.fit(x, y)


# In[ ]:




from sklearn.metrics import mean_absolute_error as mae

pdp = dtm.predict(x)
mae(y, pdp)


# In[ ]:
 #per data result values
 meth = []
 mse_m = []
 rmse_m = []
 mae_m = []
 mdae_m = []
 evs_m = []
 r2_m = []
 #Parameter Values
 k = list(param['SVR Kernel'])[0]
 md = list(param['DTR Max Depth'])[0]
 deg = list(param['PR Degree'])[0]
 #Creating models
 mlr = lm.LinearRegression()
 svr = SVR(kernel=k, epsilon=0.1, C=1)
 dt = dtr(max_depth=md)
 poly = pf(degree=deg)
 pr = lm.LinearRegression()
 c = 0
 #Repeated K Fold Cross Validation
 for tr_i, ts_i in rkf.split(data):
     print(i, c)
     train, test = data.iloc[tr_i], data.iloc[ts_i]
     train_x = train.drop(columns=['Index', 'District', 'Rainfall'])
     train_y = train['Rainfall']
     test_x = test.drop(columns=['Index', 'District', 'Rainfall'])
     test_y = test['Rainfall']
     poly_tr = poly.fit_transform(train_x)
     poly_ts = poly.fit_transform(test_x)
     #Fitting the data in the model
     mlr.fit(train_x, train_y)
                          scoring='neg_mean_absolute_error')

grid_result = gridsearch.fit(X_train, y_train)
grid_pred = gridsearch.predict(X_test)
grid_rmse = mean_squared_error(y_test, grid_pred)
print('Ridge MAE: ' + str(sum(abs(grid_pred - y_test))/(len(y_test))))
print('Ridge RMSE: ' + str(np.sqrt(grid_rmse)))
print(grid_result.best_params_)
print(abs(grid_result.best_score_))


#-------------------------DECISION TREE GRIDSEARCH----------------------------------------
""" the best values for each parameter came out as: 'max_depth'=12,
'min_samples_leaf'=1, and 'min_samples_split'=2 using Dataset 3 """

gridsearch = GridSearchCV(estimator=dtr(random_state=4), cv=5,
                          param_grid={
                              'max_depth':[10,20,30,40,50],
                              'min_samples_split':[2,3,4,5],
                              'min_samples_leaf':[1,2,3,4,5]
                              },
                          scoring='neg_mean_absolute_error')

grid_result = gridsearch.fit(X_train, y_train)
grid_pred = gridsearch.predict(X_test)
grid_rmse = mean_squared_error(y_test, grid_pred)
print('Decision Tree MAE: ' + str(sum(abs(grid_pred - y_test))/(len(y_test))))
print('Decision Tree RMSE: ' + str(np.sqrt(grid_rmse)))
print(grid_result.best_params_)
print(abs(grid_result.best_score_))
Beispiel #17
0

#Split the your data as trainning and test sets
train_X, test_X, train_y, test_y = tts(y, X, train_size = 0.33, test_size = 0.33, random_state = 42)

print(len(train_X))
print(len(train_y))
print(len(test_X))
print(len(test_y))


# In[ ]:


#Classifying the splited data and check accuracy
model = dtr()
model.fit(train_X, train_y)

a = model.score(test_X, test_y)
print('Score with model', a)
z = cs(model, test_X, test_y)

print('This is error in list', z)


# In[ ]:


#Predict your data
prediction = model.predict(test_X)
Beispiel #18
0
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder = LabelEncoder()
features[:, 1] = labelencoder.fit_transform(features[:, 1])
features[:, 3] = labelencoder.fit_transform(features[:, 3])
features[:, 4] = labelencoder.fit_transform(features[:, 4])
features[:, 5] = labelencoder.fit_transform(features[:, 5])

onehotencoder = OneHotEncoder(categorical_features=[1, 3, 4, 5])
features = onehotencoder.fit_transform(features).toarray()

labels[:, 0] = labelencoder.fit_transform(labels[:, 0])
onehotencoder = OneHotEncoder(categorical_features=[0])
labels = onehotencoder.fit_transform(labels).toarray()

from sklearn.tree import DecisionTreeRegressor as dtr
prog = dtr(random_state=0)
prog.fit(features, labels)

a = np.array([0, 1, 1, 0, 0, 0, 1, 0, 1, 10, 4]).reshape(1, -1)

pred = prog.predict(a)

features_grid = np.arange(min(features), max(features), 0.01)
features_grid = features_grid.reshape((len(features_grid)), 1)
plt.scatter(features, labels, color='red')
plt.plot(features_grid, prog.predict(features_grid), color='blue')
plt.title('Hire or Not Hire(Decision type Regression)')
plt.xlabel('Year of experience')
plt.ylabel('Hire')
plt.show()
Beispiel #19
0
 rmse_d = []
 mae_d = []
 mdae_d = []
 evs_d = []
 r2_d = []
 c = 0
 #Repeated K Fold Cross Validation
 for tr_i, ts_i in rkf.split(data):
     train, test = data.iloc[tr_i], data.iloc[ts_i]
     train_x = train.drop(columns=['District', 'Index', 'Rainfall'])
     train_y = train['Rainfall']
     test_x = test.drop(columns=['District', 'Index', 'Rainfall'])
     test_y = test['Rainfall']
     for j in dep:
         print(i, c, j)
         dt = dtr(max_depth=j)
         dt.fit(train_x, train_y)
         dt_p = dt.predict(test_x)
         #Error values
         d.append(j)
         mse_d.append(mse(test_y, dt_p))
         rmse_d.append(rmse(test_y, dt_p))
         mae_d.append(mae(test_y, dt_p))
         mdae_d.append(mdae(test_y, dt_p))
         evs_d.append(evs(test_y, dt_p))
         r2_d.append(r2(test_y, dt_p))
     c += 1
 t = {}
 t['Depth'] = d
 t['MSE'] = mse_d
 t['RMSE'] = rmse_d