print(Y.shape) # normalize X scaler = Norm().fit(X) rescaledX = scaler.transform(X) # split into train test sets using t_t_s # because we combined the datasets to apply uniform # one hot and label encoding, we set 'shuffle' parameter as false # we also know that there should be 15060 rows in the test sets test_set_size = test_dataset_nomissing.shape[0] print('\n test_set_size...') print(test_set_size) X_train, X_test, Y_train, Y_test = t_t_s(rescaledX, Y, test_size=test_set_size, random_state=seed, shuffle=False) # instantiate XGBC class using defaults model = XGBC() # fit model to training datasets print('\n training d model...') model.fit(X_train, Y_train) # view trained model print('\n model...') print(model) # make predictions for test data
dataset = dataset.drop(['fil', 'status'], axis=1) # separate into X and Y X = dataset.iloc[:, :17] Y = dataset.iloc[:, 17] X = X.values Y = Y.values print(Y.shape) encoder = LE() encoder.fit(Y) # encoded_Y = encoder.transform(Y) Y = encoder.transform(Y) X_train, X_test, Y_train, Y_test = t_t_s(X, Y, test_size=validation_size, random_state=seed, shuffle=False) # instantiate XGBC class using defaults model = XGBC() # fit model to training datasets print('\n training d model...') model.fit(X_train, Y_train) # view trained model print('\n model...') print(model) # make predictions for test data
# -*- coding: utf-8 -*- import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split as t_t_s from sklearn.naive_bayes import GaussianNB as GNB from sklearn.decomposition import PCA from sklearn.mixture import GaussianMixture as GM from sklearn.metrics import accuracy_score as a_s df = sns.load_dataset('iris') x = df.drop('species', axis=1) y = df['species'] xtr, xte, ytr, yte = t_t_s(x, y, test_size=0.25, random_state=0) print(xtr.shape, yte.shape) # (112, 4) (38,) model = GNB() model.fit(xtr, ytr) ypred = model.predict(xte) print("分类准确率:{0:.2%}".format(a_s(yte, ypred))) # dimensionality reduction pca = PCA(n_components=2) new_x = pca.fit_transform(x) xtr_new, xte_new, ytr_new, yte_new = t_t_s(new_x, y, test_size=0.25, random_state=0) print(xtr_new.shape, yte_new.shape) # (112, 2) (38,) model1 = GNB() model1.fit(xtr_new, ytr_new)
print(new.shape) sns.set(style='whitegrid') plt.figure() plt.scatter(new[:, 0], new[:, 1], c=digits.target, cmap=plt.cm.get_cmap('Spectral', 10), edgecolor='none', alpha=0.6) plt.colorbar(label='Digits', ticks=range(10), extend='both') plt.clim(-0.5, 9.5) # classification model = RFC(n_estimators=400) xtr, xte, ytr, yte = t_t_s(digits.data, digits.target, test_size=0.2, random_state=0) model.fit(xtr, ytr) ypred = model.predict(xte) fig, ax = plt.subplots(10, 10, figsize=(14, 10), subplot_kw={ 'xticks': [], 'yticks': [] }, gridspec_kw=dict(hspace=0.1, wspace=0.1)) for i, axi in enumerate(ax.flat): axi.imshow(xte.reshape(-1, 8, 8)[i], cmap='binary') axi.text(0.05, 0.05,