#Estimating house value
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split as TTS
from sklearn.ensemble import GradientBoostingRegressor as GBR
from sklearn.datasets.california_housing import fetch_california_housing

cal_hs = fetch_california_housing()
# split 80/20 train-test
X_train, X_test, y_train, y_test = TTS(cal_hs.data,
                                       cal_hs.target,
                                       test_size=0.2,
                                       random_state=1)
names = cal_hs.feature_names
print("Training GBRT...")
clf = GBR(n_estimators=100,
          max_depth=4,
          learning_rate=0.1,
          loss='huber',
          random_state=1)
clf.fit(X_train, y_train)
print(" done.")
accuracy = clf.score(X_test, y_test)
f_i = clf.feature_importances_
y_predict = clf.predict(X_test)
Beispiel #2
0
    axes[i][0].set_xticks(())
    axes[i][0].set_yticks(())
    axes[i][1].set_xticks(())
    axes[i][1].set_yticks(())

    # 设置标题
    if (i == 0):
        axes[i][0].set_title(label='分类前', pad=10, fontdict=font)
        axes[i][1].set_title(label='分类后', pad=10, fontdict=font)

    # 生成网格点
    array1, array2 = np.meshgrid(np.arange(min_x, max_x, 0.1),
                                 np.arange(min_y, max_y, 0.1))

    # 数据 TTS
    x_train, x_test, y_train, y_test = TTS(x, y, test_size=0.3)

    # 画点
    axes[i][0].scatter(x=x_train[:, 0],
                       y=x_train[:, 1],
                       c=y_train,
                       marker='o',
                       s=30,
                       cmap=colors.ListedColormap(['red', 'blue']),
                       edgecolor='black')

    # 训练数据
    clf = DecisionTreeClassifier(criterion='entropy',
                                 random_state=1,
                                 splitter='best',
                                 max_depth=4).fit(x_train, y_train)
Beispiel #3
0
from sklearn.linear_model import LinearRegression as LR
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
# from sklearn.datasets import fetch_california_housing as fch
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold, cross_val_score as CVS, train_test_split as TTS
data = load_boston()
x = data.data
y = data.target
print("x", x.shape)
print("y", y.shape)

X = pd.DataFrame(x)
xtrain, xtest, ytrain, ytest = TTS(X, y, test_size=0.3, random_state=420)
for i in [xtrain, xtest]:
    i.index = range(i.shape[0])

reg = LR().fit(xtrain, ytrain)
yhat = reg.predict(xtest)
print("w", xtest.shape)
print("coef", reg.coef_)

from sklearn.metrics import mean_squared_error as MSE
mse = MSE(yhat, ytest)
print("mes", mse)
print("mean", ytest.mean())

s = cross_val_score(reg, X, y, cv=10, scoring="neg_mean_squared_error")
print("s", s)
    'Flavanoids', 'Nonflavanoid.phenols', 'Proanth', 'Color.int', 'Hue', 'OD',
    'Proline'
]

wine = pd.read_csv('wine.csv', names=names, header=0)

X = wine.iloc[1:, 1:13]
y = wine.iloc[1:, 0]
corr = wine.corr()
print(corr)
plt.figure(figsize=(13, 13))
sns.heatmap(corr)
plt.show()
#splitting dataset into training data and testing data
from sklearn.model_selection import train_test_split as TTS
X_train, X_test, y_train, y_test = TTS(X, y, test_size=0.2, random_state=0)

#feature scaling using standardscaler
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train, y_train)
X_test = sc.transform(X_test)

#performing Linear Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=1)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

#using RandomForestClassifier to train the model and predict
from sklearn.ensemble import RandomForestClassifier as RDF
Beispiel #5
0
from sklearn.model_selection import KFold, ShuffleSplit, cross_val_score as CVS, train_test_split as TTS, GridSearchCV
from sklearn.metrics import mean_squared_error as MSE, r2_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from time import time
import datetime
import FeatureTools as ft
import pickle

# In[]:
data = load_boston()
X = data.data
y = data.target

Xtrain,Xtest,Ytrain,Ytest = TTS(X,y,test_size=0.3,random_state=420)

# In[]:
dtrain = xgb.DMatrix(Xtrain,Ytrain)

#设定参数,对模型进行训练
param = {'silent':True
          ,'obj':'reg:linear'
          ,"subsample":1
          ,"eta":0.05
          ,"gamma":20
          ,"lambda":3.5
          ,"alpha":0.2
          ,"max_depth":4
          ,"colsample_bytree":0.4
          ,"colsample_bylevel":0.6
Beispiel #6
0
 def __one_split(self, *data, **options):
     """
     Split data in train and test datasets
     :return:
     """
     return TTS(*data, **options)
Beispiel #7
0
    corpus.append(text)

from sklearn.feature_extraction.text import CountVectorizer as CV

cv = CV(max_features=1500)
features = cv.fit_transform(corpus).toarray()
labels = dataset.iloc[:, 0].values

from sklearn.preprocessing import LabelEncoder as LE

le = LE()
labels = le.fit_transform(labels)

from sklearn.model_selection import train_test_split as TTS

features_train, features_test, labels_train, labels_test = TTS(features,
                                                               labels,
                                                               test_size=0.3,
                                                               random_state=0)

from sklearn.naive_bayes import GaussianNB

classifier = GaussianNB()
classifier.fit(features_train, labels_train)

labels_pred = classifier.predict(features_test)

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(labels_test, labels_pred)
Beispiel #8
0
from sklearn.datasets import load_wine

import pandas as pd

# import tensorflow.layers as layers
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected

# GET THE DATA
wine_data = load_wine()
feat_data = wine_data['data']
labels = wine_data['target']

# SPLIT
X_train, X_test, y_train, y_test = TTS(feat_data,
                                       labels,
                                       test_size=0.3,
                                       random_state=64)

# SCALE
scaler = MMS()
scaled_x_train = scaler.fit_transform(X_train)
scaled_x_test = scaler.transform(X_test)

# ONE HOT
onehot_y_train = pd.get_dummies(y_train).values
onehot_y_test = pd.get_dummies(y_test).values

# CONSTANTS
num_feat = X_train.shape[1]
num_neurons_in_hidden = 20
num_outputs = onehot_y_train.shape[1]