예제 #1
0
파일: run.py 프로젝트: macieg-b/k-means
import random

import numpy as np
from sklearn import linear_model
from sklearn.metrics import r2_score

from model import Calculation, DataManager, KMeans, PlotGenerator

x, y = DataManager.load_data('data/iris.arff')
# PlotGenerator.data_set_2d(x, y)
# PlotGenerator.data_set_3d(x, y)
k_means = KMeans(x, y, 3, d_function='MAH')
k_means.random_centers()
C, CX = k_means.process(x)
# PlotGenerator.clusters_2d(CX, x)
# PlotGenerator.clusters_3d(CX, x)
error = Calculation.quantization_error(x, C, CX)
CX = Calculation.proper_classes(CX, y)
accuracy = Calculation.accuracy(CX, y)
print("Error: %f" % error)
print("Accuracy: %f" % accuracy)

x = list()
y = list()
for line in open("data/banknote_authentication_set.txt"):
    tmp_array = line.split(',')
    x.append([float(i) for i in tmp_array[0:-1]])
    y.append(float(tmp_array[-1].replace('\n', '')))

# PlotGenerator.data_set_2d(x, y)
PlotGenerator.data_set_3d(x, y)
예제 #2
0
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from model import DataManager, Classifier, PlotGenerator

REPETITION = 50

probes, result = DataManager.load_data('data/diabetes.arff')
x = np.array(probes)
y = np.array(result)

bayes = Classifier(MultinomialNB(), REPETITION, x, y)
bayes.calculate_indicators()

logistic_regression = Classifier(LogisticRegression(), REPETITION, x, y)
logistic_regression.calculate_indicators()

kneighbours_classifier = Classifier(KNeighborsClassifier(10), REPETITION, x, y)
kneighbours_classifier.calculate_indicators()

mlp_classifier = Classifier(MLPClassifier(), REPETITION, x, y)
mlp_classifier.calculate_indicators()

classifiers_array = []
classifiers_array.append(bayes)
classifiers_array.append(logistic_regression)
classifiers_array.append(kneighbours_classifier)