import numpy as np from sklearn import linear_model import matplotlib.pyplot as plt from utilities import visualize_classifier # define sample input data X = np.array([ [3.1, 7.2], [4, 6.7], [2.9, 8], [5.1, 4.5], [6, 5], [5.6, 5] ]) y = np.array( [0, 0, 1, 1, 2, 3] ) # create logistic regression classifier classifier = linear_model.LogisticRegression(solver = 'liblinear', C = 1) # train the classifier classifier.fit(X, y) # visualize the performance of the classifier visualize_classifier(classifier, X, y)
# split data into training and testing datasets X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.25, random_state=5) # ensemble learning classifier params = {'n_estimators': 100, 'max_depth': 4, 'random_state': 0} if classifier_type == 'rf': classifier = RandomForestClassifier(**params) else: classifier = ExtraTreesClassifier(**params) # constructing the classifier based on if/else statement classifier.fit(X_train, y_train) visualize_classifier(classifier, X_train, y_train) y_test_pred = classifier.predict(X_test) visualize_classifier(classifier, X_test, y_test) # evaluating the classifier performance class_names = ['Class-0', 'Class-1', 'Class-2'] print("\n" + "#" * 40) print("\nClassifier performance on training dataset\n") print( classification_report(y_train, classifier.predict(X_train), target_names=class_names)) print("#" * 40 + "\n") print("#" * 40) print("\nClassifier performance on test dataset\n") print(classification_report(y_test, y_test_pred, target_names=class_names))
plt.figure() plt.scatter(class_0[:, 0], class_0[:, 1], s=None, facecolors='black', edgecolors='blue', linewidth=1, marker='x') plt.scatter(class_1[:, 0], class_1[:, 1], s=None, facecolors='white', edgecolors='red', linewidth=1, marker='o') plt.title('Input data') # Split data into training and testing datasets X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, random_state=5) # Decision Trees classifier params = {'random_state': 0, 'max_depth': 4} classifier = DecisionTreeClassifier(**params) classifier.fit(X_train, y_train) visualize_classifier(classifier, X_train, y_train, 'Training dataset') y_test_pred = classifier.predict(X_test) visualize_classifier(classifier, X_test, y_test, 'Test dataset') # Evaluate classifier performance class_names = ['Class-0', 'Class-1'] print("\n" + "#"*40) print("\nClassifier performance on training dataset\n") print(classification_report(y_train, classifier.predict(X_train), target_names=class_names)) print("#"*40 + "\n") print("#"*40) print("\nClassifier performance on test dataset\n") print(classification_report(y_test, y_test_pred, target_names=class_names)) print("#"*40 + "\n")
plt.scatter(class_1[:, 0], class_1[:, 1], s=75, facecolors='white', edgecolors='black', linewidth=1, marker='o') plt.scatter(class_2[:, 0], class_2[:, 1], s=75, facecolors='white', edgecolors='black', linewidth=1, marker='^') plt.title('Input data') # Split data into training and testing datasets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5) # Ensemble Learning classifier params = {'n_estimators': 100, 'max_depth': 4, 'random_state': 0} if classifier_type == 'rf': classifier = RandomForestClassifier(**params) else: classifier = ExtraTreesClassifier(**params) classifier.fit(X_train, y_train) visualize_classifier(classifier, X_train, y_train, 'Training dataset') y_test_pred = classifier.predict(X_test) visualize_classifier(classifier, X_test, y_test, 'Test dataset') # Evaluate classifier performance class_names = ['Class-0', 'Class-1', 'Class-2'] print("\n" + "#"*40) print("\nClassifier performance on training dataset\n") print(classification_report(y_train, classifier.predict(X_train), target_names=class_names)) print("#"*40 + "\n") print("#"*40) print("\nClassifier performance on test dataset\n") print(classification_report(y_test, y_test_pred, target_names=class_names)) print("#"*40 + "\n")
# Create Naïve Bayes classifier classifier = GaussianNB() # Train the classifier classifier.fit(X, y) # Predict values from training data y_pred = classifier.predict(X) # Compute accuracy accuracy = 100.0 * (y == y_pred).sum() / y.shape[0] print("Accuracy of Naïve Bayes classifier = ", round(accuracy, 2), "%") # Visualize the performance of the classifier visualize_classifier(classifier, X, y) # Split data into training and test data X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.2, random_state=3) classifier_new = GaussianNB() classifier_new.fit(X_train, y_train) y_test_pred = classifier_new.predict(X_test) # compute accuracy of the classifier accuracy = 100.0 * (y_test == y_test_pred).sum() / X_test.shape[0] print("Accuracy of the new classifier =", round(accuracy, 2), "%") # Visualize the performance of the classifier visualize_classifier(classifier_new, X_test, y_test) num_folds = 3
from sklearn.naive_bayes import GaussianNB from sklearn.model_selection import train_test_split, cross_val_score from utilities import visualize_classifier input_file = 'data_multivar_nb.txt' data = np.loadtxt(input_file, delimiter=',') x, y = data[:, :-1], data[:, -1] classifier = GaussianNB() classifier.fit(x, y) y_pred = classifier.predict(x) #Качество классификатора accuracy = 100.0 * (y == y_pred).sum() / x.shape[0] print("Качество наивной байесовского классификации = ", round(accuracy, 2), '%') visualize_classifier(classifier, x, y) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=3) classifier_new = GaussianNB() classifier_new.fit(x_train, y_train) y_test_pred = classifier_new.predict(x_test) accuracy = 100.0 * (y_test == y_test_pred).sum() / x.shape[0] print("Качество классификации = ", round(accuracy, 2), '%') visualize_classifier(classifier_new, x_test, y_test) num_folds = 3 accuracy_values = cross_val_score(classifier, x,
# Split data into training and testing datasets x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=5) # Ensemble Learning classifier params = {'n_estimators': 100, 'max_depth': 4, 'random_state': 0} if classifier_type == 'rf': classifier = RandomForestClassifier(**params) else: classifier = ExtraTreesClassifier(**params) classifier.fit(x_train, y_train) visualize_classifier(classifier, x_train, y_train) y_test_pred = classifier.predict(x_test) visualize_classifier(classifier, x_test, y_test) # Evaluate classifier performance class_names = ['Class-0', 'Class-1', 'Class-2'] print('\n' + '#' * 40) print('\nClassifier performance on training dataset\n') print( classification_report(y_train, classifier.predict(x_train), target_names=class_names)) print('#' * 40 + '\n') print('#' * 40) print('\nClassifier performance on test dataset\n')
# visualize_classifier(classifier, X_test, y_test, 'Test dataset') # Evaluate classifier performance # class_names = ['Class-0', 'Class-1', 'Class-2'] # print('\n' + '#'*40) # print('\nClassifier performance on training dataset\n') # print(classification_report(y_train, classifier.predict(X_train), target_names=class_names)) # print('#'*40 + '\n') # print('#'*40) # print('\nClassifier performance on test dataset\n') # print(classification_report(y_test, y_test_pred, target_names=class_names)) # print('#'*40 + '\n') # python3 random_forests.py --classifier-type rf # python3 random_forests.py --classifier-type erf # -----#----- # Compute confidence test_datapoints = np.array([[5, 5], [3, 6], [6, 4], [7, 2], [4, 4], [5, 2]]) print('\nConfidence measure:') for datapoint in test_datapoints: probabilities = classifier.predict_proba([datapoint])[0] predicted_class = 'Class-' + str(np.argmax(probabilities)) print('\nDatapoint :', datapoint) print('Predicted class:', predicted_class) # Visualize the datapoints visualize_classifier(classifier, test_datapoints, [0] * len(test_datapoints), 'Test datapoints') plt.show()