import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt

from utilities import visualize_classifier

# define sample input data
X = np.array([
    [3.1, 7.2],
    [4, 6.7],
    [2.9, 8],
    [5.1, 4.5],
    [6, 5],
    [5.6, 5]
])
y = np.array(
    [0, 0, 1, 1, 2, 3]
)

# create logistic regression classifier
classifier = linear_model.LogisticRegression(solver = 'liblinear', C = 1)

# train the classifier
classifier.fit(X, y)

# visualize the performance of the classifier
visualize_classifier(classifier, X, y)
    # split data into training and testing datasets
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.25,
                                                        random_state=5)
    # ensemble learning classifier
    params = {'n_estimators': 100, 'max_depth': 4, 'random_state': 0}
    if classifier_type == 'rf':
        classifier = RandomForestClassifier(**params)
    else:
        classifier = ExtraTreesClassifier(**params)

    # constructing the classifier based on if/else statement
    classifier.fit(X_train, y_train)
    visualize_classifier(classifier, X_train, y_train)
    y_test_pred = classifier.predict(X_test)
    visualize_classifier(classifier, X_test, y_test)

    # evaluating the classifier performance
    class_names = ['Class-0', 'Class-1', 'Class-2']
    print("\n" + "#" * 40)
    print("\nClassifier performance on training dataset\n")
    print(
        classification_report(y_train,
                              classifier.predict(X_train),
                              target_names=class_names))
    print("#" * 40 + "\n")
    print("#" * 40)
    print("\nClassifier performance on test dataset\n")
    print(classification_report(y_test, y_test_pred, target_names=class_names))
Beispiel #3
0
plt.figure()
plt.scatter(class_0[:, 0], class_0[:, 1], s=None, facecolors='black', 
        edgecolors='blue', linewidth=1, marker='x')
plt.scatter(class_1[:, 0], class_1[:, 1], s=None, facecolors='white', 
        edgecolors='red', linewidth=1, marker='o')
plt.title('Input data')

# Split data into training and testing datasets 
X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=5)

# Decision Trees classifier 
params = {'random_state': 0, 'max_depth': 4}
classifier = DecisionTreeClassifier(**params)
classifier.fit(X_train, y_train)
visualize_classifier(classifier, X_train, y_train, 'Training dataset')

y_test_pred = classifier.predict(X_test)
visualize_classifier(classifier, X_test, y_test, 'Test dataset')

# Evaluate classifier performance
class_names = ['Class-0', 'Class-1']
print("\n" + "#"*40)
print("\nClassifier performance on training dataset\n")
print(classification_report(y_train, classifier.predict(X_train), target_names=class_names))
print("#"*40 + "\n")

print("#"*40)
print("\nClassifier performance on test dataset\n")
print(classification_report(y_test, y_test_pred, target_names=class_names))
print("#"*40 + "\n")
    plt.scatter(class_1[:, 0], class_1[:, 1], s=75, facecolors='white', edgecolors='black', linewidth=1, marker='o')
    plt.scatter(class_2[:, 0], class_2[:, 1], s=75, facecolors='white', edgecolors='black', linewidth=1, marker='^')
    plt.title('Input data')

    # Split data into training and testing datasets 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)

    # Ensemble Learning classifier
    params = {'n_estimators': 100, 'max_depth': 4, 'random_state': 0}
    if classifier_type == 'rf':
        classifier = RandomForestClassifier(**params)
    else:
        classifier = ExtraTreesClassifier(**params)

    classifier.fit(X_train, y_train)
    visualize_classifier(classifier, X_train, y_train, 'Training dataset')

    y_test_pred = classifier.predict(X_test)
    visualize_classifier(classifier, X_test, y_test, 'Test dataset')

    # Evaluate classifier performance
    class_names = ['Class-0', 'Class-1', 'Class-2']
    print("\n" + "#"*40)
    print("\nClassifier performance on training dataset\n")
    print(classification_report(y_train, classifier.predict(X_train), target_names=class_names))
    print("#"*40 + "\n")

    print("#"*40)
    print("\nClassifier performance on test dataset\n")
    print(classification_report(y_test, y_test_pred, target_names=class_names))
    print("#"*40 + "\n")
Beispiel #5
0
# Create Naïve Bayes classifier
classifier = GaussianNB()

# Train the classifier
classifier.fit(X, y)

# Predict values from training data
y_pred = classifier.predict(X)

# Compute accuracy
accuracy = 100.0 * (y == y_pred).sum() / y.shape[0]
print("Accuracy of Naïve Bayes classifier = ", round(accuracy, 2), "%")

# Visualize the performance of the classifier
visualize_classifier(classifier, X, y)

# Split data into training and test data
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, y, test_size=0.2, random_state=3)
classifier_new = GaussianNB()
classifier_new.fit(X_train, y_train)
y_test_pred = classifier_new.predict(X_test)

# compute accuracy of the classifier
accuracy = 100.0 * (y_test == y_test_pred).sum() / X_test.shape[0]
print("Accuracy of the new classifier =", round(accuracy, 2), "%")
# Visualize the performance of the classifier
visualize_classifier(classifier_new, X_test, y_test)

num_folds = 3
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split, cross_val_score
from utilities import visualize_classifier

input_file = 'data_multivar_nb.txt'
data = np.loadtxt(input_file, delimiter=',')
x, y = data[:, :-1], data[:, -1]
classifier = GaussianNB()
classifier.fit(x, y)
y_pred = classifier.predict(x)

#Качество классификатора
accuracy = 100.0 * (y == y_pred).sum() / x.shape[0]
print("Качество наивной байесовского классификации = ", round(accuracy, 2),
      '%')
visualize_classifier(classifier, x, y)

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=3)
classifier_new = GaussianNB()
classifier_new.fit(x_train, y_train)
y_test_pred = classifier_new.predict(x_test)
accuracy = 100.0 * (y_test == y_test_pred).sum() / x.shape[0]
print("Качество классификации = ", round(accuracy, 2), '%')
visualize_classifier(classifier_new, x_test, y_test)

num_folds = 3
accuracy_values = cross_val_score(classifier,
                                  x,
Beispiel #7
0
    # Split data into training and testing datasets
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=5)

    # Ensemble Learning classifier
    params = {'n_estimators': 100, 'max_depth': 4, 'random_state': 0}

    if classifier_type == 'rf':
        classifier = RandomForestClassifier(**params)
    else:
        classifier = ExtraTreesClassifier(**params)

    classifier.fit(x_train, y_train)
    visualize_classifier(classifier, x_train, y_train)

    y_test_pred = classifier.predict(x_test)
    visualize_classifier(classifier, x_test, y_test)

    # Evaluate classifier performance
    class_names = ['Class-0', 'Class-1', 'Class-2']
    print('\n' + '#' * 40)
    print('\nClassifier performance on training dataset\n')
    print(
        classification_report(y_train,
                              classifier.predict(x_train),
                              target_names=class_names))
    print('#' * 40 + '\n')
    print('#' * 40)
    print('\nClassifier performance on test dataset\n')
Beispiel #8
0
# visualize_classifier(classifier, X_test, y_test, 'Test dataset')

# Evaluate classifier performance
# class_names = ['Class-0', 'Class-1', 'Class-2']
# print('\n' + '#'*40)
# print('\nClassifier performance on training dataset\n')
# print(classification_report(y_train, classifier.predict(X_train), target_names=class_names))
# print('#'*40 + '\n')
# print('#'*40)
# print('\nClassifier performance on test dataset\n')
# print(classification_report(y_test, y_test_pred, target_names=class_names))
# print('#'*40 + '\n')

# python3 random_forests.py --classifier-type rf
# python3 random_forests.py --classifier-type erf

# -----#-----
# Compute confidence
test_datapoints = np.array([[5, 5], [3, 6], [6, 4], [7, 2], [4, 4], [5, 2]])

print('\nConfidence measure:')
for datapoint in test_datapoints:
    probabilities = classifier.predict_proba([datapoint])[0]
    predicted_class = 'Class-' + str(np.argmax(probabilities))
    print('\nDatapoint :', datapoint)
    print('Predicted class:', predicted_class)

# Visualize the datapoints
visualize_classifier(classifier, test_datapoints, [0] * len(test_datapoints),
                     'Test datapoints')
plt.show()