#Estimating house value import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split as TTS from sklearn.ensemble import GradientBoostingRegressor as GBR from sklearn.datasets.california_housing import fetch_california_housing cal_hs = fetch_california_housing() # split 80/20 train-test X_train, X_test, y_train, y_test = TTS(cal_hs.data, cal_hs.target, test_size=0.2, random_state=1) names = cal_hs.feature_names print("Training GBRT...") clf = GBR(n_estimators=100, max_depth=4, learning_rate=0.1, loss='huber', random_state=1) clf.fit(X_train, y_train) print(" done.") accuracy = clf.score(X_test, y_test) f_i = clf.feature_importances_ y_predict = clf.predict(X_test)
axes[i][0].set_xticks(()) axes[i][0].set_yticks(()) axes[i][1].set_xticks(()) axes[i][1].set_yticks(()) # 设置标题 if (i == 0): axes[i][0].set_title(label='分类前', pad=10, fontdict=font) axes[i][1].set_title(label='分类后', pad=10, fontdict=font) # 生成网格点 array1, array2 = np.meshgrid(np.arange(min_x, max_x, 0.1), np.arange(min_y, max_y, 0.1)) # 数据 TTS x_train, x_test, y_train, y_test = TTS(x, y, test_size=0.3) # 画点 axes[i][0].scatter(x=x_train[:, 0], y=x_train[:, 1], c=y_train, marker='o', s=30, cmap=colors.ListedColormap(['red', 'blue']), edgecolor='black') # 训练数据 clf = DecisionTreeClassifier(criterion='entropy', random_state=1, splitter='best', max_depth=4).fit(x_train, y_train)
from sklearn.linear_model import LinearRegression as LR from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score # from sklearn.datasets import fetch_california_housing as fch import pandas as pd from sklearn.datasets import load_boston from sklearn.datasets import load_boston from sklearn.model_selection import KFold, cross_val_score as CVS, train_test_split as TTS data = load_boston() x = data.data y = data.target print("x", x.shape) print("y", y.shape) X = pd.DataFrame(x) xtrain, xtest, ytrain, ytest = TTS(X, y, test_size=0.3, random_state=420) for i in [xtrain, xtest]: i.index = range(i.shape[0]) reg = LR().fit(xtrain, ytrain) yhat = reg.predict(xtest) print("w", xtest.shape) print("coef", reg.coef_) from sklearn.metrics import mean_squared_error as MSE mse = MSE(yhat, ytest) print("mes", mse) print("mean", ytest.mean()) s = cross_val_score(reg, X, y, cv=10, scoring="neg_mean_squared_error") print("s", s)
'Flavanoids', 'Nonflavanoid.phenols', 'Proanth', 'Color.int', 'Hue', 'OD', 'Proline' ] wine = pd.read_csv('wine.csv', names=names, header=0) X = wine.iloc[1:, 1:13] y = wine.iloc[1:, 0] corr = wine.corr() print(corr) plt.figure(figsize=(13, 13)) sns.heatmap(corr) plt.show() #splitting dataset into training data and testing data from sklearn.model_selection import train_test_split as TTS X_train, X_test, y_train, y_test = TTS(X, y, test_size=0.2, random_state=0) #feature scaling using standardscaler from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train, y_train) X_test = sc.transform(X_test) #performing Linear Discriminant Analysis from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA lda = LDA(n_components=1) X_train = lda.fit_transform(X_train, y_train) X_test = lda.transform(X_test) #using RandomForestClassifier to train the model and predict from sklearn.ensemble import RandomForestClassifier as RDF
from sklearn.model_selection import KFold, ShuffleSplit, cross_val_score as CVS, train_test_split as TTS, GridSearchCV from sklearn.metrics import mean_squared_error as MSE, r2_score import pandas as pd import numpy as np import matplotlib.pyplot as plt from time import time import datetime import FeatureTools as ft import pickle # In[]: data = load_boston() X = data.data y = data.target Xtrain,Xtest,Ytrain,Ytest = TTS(X,y,test_size=0.3,random_state=420) # In[]: dtrain = xgb.DMatrix(Xtrain,Ytrain) #设定参数,对模型进行训练 param = {'silent':True ,'obj':'reg:linear' ,"subsample":1 ,"eta":0.05 ,"gamma":20 ,"lambda":3.5 ,"alpha":0.2 ,"max_depth":4 ,"colsample_bytree":0.4 ,"colsample_bylevel":0.6
def __one_split(self, *data, **options): """ Split data in train and test datasets :return: """ return TTS(*data, **options)
corpus.append(text) from sklearn.feature_extraction.text import CountVectorizer as CV cv = CV(max_features=1500) features = cv.fit_transform(corpus).toarray() labels = dataset.iloc[:, 0].values from sklearn.preprocessing import LabelEncoder as LE le = LE() labels = le.fit_transform(labels) from sklearn.model_selection import train_test_split as TTS features_train, features_test, labels_train, labels_test = TTS(features, labels, test_size=0.3, random_state=0) from sklearn.naive_bayes import GaussianNB classifier = GaussianNB() classifier.fit(features_train, labels_train) labels_pred = classifier.predict(features_test) from sklearn.metrics import confusion_matrix cm = confusion_matrix(labels_test, labels_pred)
from sklearn.datasets import load_wine import pandas as pd # import tensorflow.layers as layers import tensorflow as tf from tensorflow.contrib.layers import fully_connected # GET THE DATA wine_data = load_wine() feat_data = wine_data['data'] labels = wine_data['target'] # SPLIT X_train, X_test, y_train, y_test = TTS(feat_data, labels, test_size=0.3, random_state=64) # SCALE scaler = MMS() scaled_x_train = scaler.fit_transform(X_train) scaled_x_test = scaler.transform(X_test) # ONE HOT onehot_y_train = pd.get_dummies(y_train).values onehot_y_test = pd.get_dummies(y_test).values # CONSTANTS num_feat = X_train.shape[1] num_neurons_in_hidden = 20 num_outputs = onehot_y_train.shape[1]