Ejemplo n.º 1
0
scaled_X = scaler.fit_transform(X)

new_X = pd.DataFrame(scaled_X, columns=X.columns)
new_X.head

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(new_X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

#check r2 score accuracy for Train data
from sklearn.tree import ExtraTreeRegressor
model = ExtraTreeRegressor()
model.fit(X_train, y_train)
print(model.score(X_train, y_train))

#check r2 score accuracy for Test data
from sklearn.tree import ExtraTreeRegressor
model = ExtraTreeRegressor()
model.fit(X_test, y_test)
print(model.score(X_test, y_test))

print(model.feature_importances_)
imp_feat = pd.Series(model.feature_importances_, index=X.columns)
imp_feat.nlargest(5).plot(kind='barh')
plt.show()

from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X_train, y_train)
Ejemplo n.º 2
0
from math import *
import pandas as pd
import numpy as np
from sklearn.tree import ExtraTreeRegressor
import matplotlib.pyplot as plt
import re,os
data=pd.read_csv('ice.csv')
x=data[['temp','street']]
y=data['ice']
clf=ExtraTreeRegressor()
clf.fit(x,y)
p=clf.predict(x)
print clf.score(x,y)
t=np.arange(0.0,31.0)
plt.plot(t,data['ice'],'--',t,p,'-')
plt.show()
Ejemplo n.º 3
0
res5 = forest_reg.score(X_test, y_test)
print('forest_reg: ', res5)

grad_reg = GradientBoostingRegressor(n_estimators=500)
grad_reg.fit(X_train, y_train)
grad_reg.fit(X_train, y_train)
res6 = grad_reg.score(X_test, y_test)
print('grad_reg: ', res6)

ada_reg = AdaBoostRegressor(n_estimators=200)
ada_reg.fit(X_train, y_train)
ada_reg.fit(X_train, y_train)
res7 = ada_reg.score(X_test, y_test)
print('ada_reg: ', res7)

decision_reg = DecisionTreeRegressor(random_state=333,
                                     min_samples_leaf=3,
                                     max_leaf_nodes=5)
decision_reg.fit(X_train, y_train)
decision_reg.fit(X_train, y_train)
res8 = decision_reg.score(X_test, y_test)
print('decision_reg: ', res8)

extraTree_reg = ExtraTreeRegressor(random_state=333,
                                   min_samples_leaf=3,
                                   max_leaf_nodes=5)
extraTree_reg.fit(X_train, y_train)
extraTree_reg.fit(X_train, y_train)
res9 = extraTree_reg.score(X_test, y_test)
print('extraTree_reg: ', res9)
Ejemplo n.º 4
0
# In[ ]:

from sklearn.tree import ExtraTreeRegressor

dtr = ExtraTreeRegressor()

# In[ ]:

# Fit model
dtr.fit(X_train, y_train)

# In[ ]:

# Fit model
dtr.score(X_train, y_train)

# -----------------
# <a id="8"></a> <br>
# ## 8- Conclusion
# This kernel is not completed yet, I will try to cover all the parts related to the process of ML with a variety of Python packages and I know that there are still some problems then I hope to get your feedback to improve it.

# You can follow me on:
# <br>
# > ###### [ GitHub](https://github.com/mjbahmani)
# <br>
# --------------------------------------
#
#  **I hope you find this kernel helpful and some <font color="red"><b>UPVOTES</b></font> would be very much appreciated**

# <a id="9"></a> <br>
Ejemplo n.º 5
0
from math import *
import pandas as pd
import numpy as np
from sklearn.tree import ExtraTreeRegressor
import matplotlib.pyplot as plt
import re, os
data = pd.read_csv('ice.csv')
x = data[['temp', 'street']]
y = data['ice']
clf = ExtraTreeRegressor()
clf.fit(x, y)
p = clf.predict(x)
print clf.score(x, y)
t = np.arange(0.0, 31.0)
plt.plot(t, data['ice'], '--', t, p, '-')
plt.show()
#print("Best score on train set:{:.2f}".format(svr.best_score_))
#y_pred = svr.predict(X_test)
'''lgb'''

#
gbm = ExtraTreeRegressor()

gbm = GridSearchCV(gbm, param_grid={"min_samples_leaf":[1,4,8,16,32],\
                                     'min_samples_split':[4,10,20,100],\
                                  'max_depth':[2,8,16,32]}, cv=6)

gbm.fit(X_train, y_train)
y_pred = gbm.predict(X_test)
# eval
print("MSE:", metrics.mean_squared_error(y_test, y_pred))
print("Test set score:{:.2f}".format(gbm.score(X_test, y_test)))
#print("AUC Score (Train): %f" % metrics.roc_auc_score(y_test, y_pred))

fig, ax = plt.subplots()
ax.scatter(y_test, y_pred)
ax.plot([y_test.min(), y_test.max()],
        [y_pred.min(), y_pred.max()],
        'k--',
        lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

with open('model.pickle', 'wb') as fw:
    pickle.dump(gbm, fw)
#with open('model.pickle', 'rb') as fr: