from sklearn.tree import DecisionTreeClassifier import utils import numpy as np import matplotlib.pyplot as plot data = x_train, x_test, y_train, y_test = utils.import_wine(n_samples=5000, y_transform=None) # WITHOUT TUNING base_clf = DecisionTreeClassifier(random_state=0, criterion='entropy', max_leaf_nodes=20000, max_depth=50) utils.boosting(*data, base_clf, verbose=True) # 10-FOLD CROSSVALIDATION ON THE LEARNING RATE scores = [] learning_rates = [] for learning_rate in np.linspace(1.3, 1.45, 5): print(learning_rate) learning_rates.append(learning_rate) scores.append( utils.boosting_crossval(*data, base_clf, learning_rate=learning_rate)[1]) plot.style.use('seaborn-darkgrid') plot.title('Influence of the learning rate on boosting')
utils.knn(*data, n_neighbors=k) # INFLUENCE OF THE WEIGHTS utils.knn(*data, n_neighbors=20, weights='distance') # INFLUENCE OF THE METRICS metrics = ['manhattan', 'chebyshev'] for metric in metrics: utils.knn(*data, n_neighbors=20, metric=metric) # BEST MODEL data = x_train, x_test, y_train, y_test = utils.import_wine(y_transform=None) utils.knn(*data, n_neighbors=20) # LEARNING CURVE clf = KNeighborsClassifier(n_neighbors=20) x = [] train = [] test = [] for i in [0.02, 0.1, 1, 2, 3, 5, 10, 15, 25, 32]: index = int(1000 * i) x.append(index) clf.fit(x_train[:index], y_train[:index].values.ravel()) train.append(clf.score(x_train[:index], y_train[:index].values.ravel())) test.append(clf.score(x_test, y_test.values.ravel()))
x.append(count) count += 1 plot.bar(x, cataccs, color=color, width=0.75) x = [] count = 1.25 for _ in range(5): x.append(count) count += 1.5 plot.xticks(x, ['None', 'PCA', 'ICA', 'RP', 'VAE']) plot.xlabel('Feature transformation method') plot.ylabel('Categorical accuracy (%)') plot.show() x_adult, y_adult, x_adult_test, y_adult_test = utils.import_adult() x_wine, y_wine, x_wine_test, y_wine_test = utils.import_wine() # K-MEANS silhouette('adult', range(2, 15), x_adult) # 1025s cluster_breakdown('adult_kmeans', x_adult, KMeans(n_clusters=5, random_state=0).fit_predict(x_adult)) # 10s cataccs('adult', range(2, 15), 2, x_adult, y_adult) # 91s silhouette('wine', range(2, 15), x_wine) # 2154s cluster_breakdown('wine_kmeans', x_wine, KMeans(n_clusters=3, random_state=0).fit_predict(x_wine)) # 24s cataccs('wine reviews', range(2, 15), 5, x_wine, y_wine) # 577s
from keras import Sequential from keras.layers import Dense import matplotlib.pyplot as plot import utils data = x_train, x_test, y_train, y_test = utils.import_wine( y_transform='to_categorical') model = Sequential() model.add(Dense(10, input_dim=len(x_train.keys()), activation='relu')) model.add(Dense(15, activation='relu')) model.add(Dense(len(y_train[0]), activation='softmax')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy']) history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=20, batch_size=50) utils.plot_ann_history(history) plot.show() y_predict = model.predict(x_test[:5]) for i in range(5): plot.bar([80, 84, 88, 92, 96, 100], [*y_test[i], 0], width=4, align='edge') plot.bar([80, 84, 88, 92, 96, 100], [*y_predict[i], 0],
import utils data = x_train, x_test, y_train, y_test = utils.import_wine() # WITHOUT PRUNING utils.dt_pruning(*data) # PRUNING MANUALLY utils.dt_pruning(*data, 1000, 40) utils.dt_pruning(*data, 10000, 70) utils.dt_pruning(*data, 40000, 90) utils.dt_pruning(*data, 45000, 95) # PRUNING WITH 10-FOLD CROSS-VALIDATION clf = utils.dt_crossval(*data, n_leaf_range=range(41250, 41751, 100), n_depth_range=range(100, 101))