예제 #1
0
    def __init__(self, dimensions=0, k_neighbors=5, use_kernel=False):
        '''
        Constructor for FaceRecognizer.

        Args (optional):
            k_rank (int): How many principal components to keep.
            k_neighbors (int): How many neighbors to compare against in the
                kNN classifier.
        '''

        self.pca_model = PCAModel(dimensions=dimensions, use_kernel=use_kernel)
        self.knn_classifier = KNNClassifier(neighbors=k_neighbors)
        self.instances = None
예제 #2
0
def kneighbors(tDataX, tDataY, mineData):
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        tDataX, tDataY, test_size=0.4, random_state=0)

    # X_train: The Attributes from the test data that will be used to train the model
    # X_test: The Attributes from the test data that will be used to test the model
    # y_train: The classification from the test data that will be used to train the model
    # y_train: The classification from the test data that will be used to test the model

    # A good rule of thumb is that k = the sqrt of n. Round to nearest int
    n_neighbors = int(np.sqrt(len(X_train)))
    weights = 'uniform'

    # we create an instance of Neighbours Classifier and fit the data.
    knn = nClass.KNN(n_neighbors, weights, X_train, y_train)
    test_pred = knn.predictDataSet(X_test)
    accuracy = knn.accuracy(X_test, y_test)

    actual_pred = knn.predictDataSet(mineData)
    resp = {
        'Testset Class': y_test,
        'Predicted Testset Class': test_pred,
        'Classes': actual_pred
    }
    resp['mineAttrs'] = mineData
    resp['accuracy'] = accuracy
    return resp
예제 #3
0
def run(url, header, categories=None, Nan="?", k=3):
    data = read(url, header)
    #print data
    if categories is not None:
        for key in categories:
            conversion = categorize(data[key], categories[key], Nan=Nan)
            #print conversion.keys()
            data[key] = data[key].map(conversion)
            #for x in xrange(len(data[key])):
            #    #print data[key][x]
            #    if data[key][x] in conversion.keys():
            #        print "converting..."
            #        data[key][x] = conversion[data[key][x]]
    else:
        for head in header:
            if numpy.issubdtype(data[head].dtype, numpy.number):
                conversion = categorize(data[head], build_options_list(data[head], Nan))
                data[head] = data[head].map(conversion)
    print data[header[13]][14]
    #data.replace(Nan, "0")
    #for key in header:
    #    #print data[key]
    #    for y in xrange(len(data[key])):
    #        if data[key][y] == "?":
    #            print "Guessing..."
    #            data[key][y] = 0.0
    print data
    print data[header[13]][14]
    data.apply(pandas.to_numeric)
    print data[header[13]][14]
    data = data.as_matrix()
    print data[14][13]
    print data
    print "data tested with K = ", k, ": ", KNNClassifier.test_classifier(data[:, range(13)], data[:, 14], k)
예제 #4
0
    def __init__(self, dimensions=0, k_neighbors=5, use_kernel=False):
        '''
        Constructor for FaceRecognizer.

        Args (optional):
            k_rank (int): How many principal components to keep.
            k_neighbors (int): How many neighbors to compare against in the
                kNN classifier.
        '''

        self.pca_model = PCAModel(dimensions=dimensions, use_kernel=use_kernel)
        self.knn_classifier = KNNClassifier(neighbors=k_neighbors)
        self.instances = None
예제 #5
0
def main():
    df_beijing = pd.read_csv(BEIJING_PATH)
    df_shenyang = pd.read_csv(SHENYANG_PATH)
    X, Y = prepare_data_and_labels(df_beijing, df_shenyang)

    clf = KNNClassifier(K=5)
    validate(clf, X, Y)

    print("GUANGZHOU")
    df_guangzhou = pd.read_csv(GUANGZHOU_PATH)
    X_test, Y_test = prepare_data_and_labels(df_guangzhou)
    test(clf, X_test, Y_test)

    print("SHANGHAI")
    df_shanghai = pd.read_csv(SHANGHAI_PATH)
    X_test, Y_test = prepare_data_and_labels(df_shanghai)
    test(clf, X_test, Y_test)
예제 #6
0
def classify(folder, main_folder,num_class, num_feature,F, K):
#for folder in address: 
    feature_num_class = []
    with open(main_folder+"/feature_num_of_classes.csv",'rU') as f:
        reader = csv.reader(f)
        for row in reader:
            feature_num_class.append(row)
    f.close()
    
    binary_class_labels = []
    with open(main_folder+"/binary_class_labels.csv",'rU') as f:
        reader = csv.reader(f)
        for row in reader:
            binary_class_labels.append(row)
    f.close()
    
    feature_vectors = []
    with open(main_folder+"/feature_vectors_trim.csv",'rU') as f:
        reader = csv.reader(f)
        for row in reader:
            feature_vectors.append(row)
    f.close()
    
    result = open(folder+"/knn_results.txt","w")
    class_frequency = []
    feature_weights = []
    training_article_nums = []
    test_article_nums = []
    sample_training_article_nums = []
    sample_test_article_nums = []
    sample_training_vectors = []
    sample_training_labels = []
    sample_test_vectors = []
    sample_test_labels = []
    
    with open(folder+"/training_article_numbers.csv", 'rb') as f:
        reader=csv.reader(f)
        for row in reader:
            training_article_nums.append(row)
    f.close()
    
    with open(folder+"/test_article_numbers.csv", 'rb') as f:
        reader=csv.reader(f)
        for row in reader:
            test_article_nums.append(row)
    f.close()    

    with open(main_folder+"/class_frequency.csv", 'rb') as f:
        reader = csv.reader(f)
        for row in reader:
            class_frequency.append(row)
    f.close()
    with open(main_folder+"/weight_feature_class.csv", 'rU') as f:
        reader = csv.reader(f)
        for row in reader:
            feature_weights.append(row)
    f.close() 
    
    if False: from random import shuffle; X=list(training_article_nums[0]); shuffle(training_article_nums[0])
    for k in xrange(F):
        #sample_training = [x for i, x in enumerate(article_number) if i % K != k]
        sample_training = [x for i, x in enumerate(training_article_nums[0]) if i % F == k]
    with open(folder+"/sample_training_article_num.csv", 'wb') as f:
        writer=csv.writer(f)
        writer.writerow(sample_training)
    f.close()
    
    if False: from random import shuffle; X=list(test_article_nums[0]); shuffle(test_article_nums[0])
    for k in xrange(F):
        #sample_training = [x for i, x in enumerate(article_number) if i % K != k]
        sample_test = [x for i, x in enumerate(test_article_nums[0]) if i % F == k]
    with open(folder+"/sample_test_article_num.csv", 'wb') as f:
        writer=csv.writer(f)
        writer.writerow(sample_test)
    f.close()
    
    with open(folder+"/sample_training_article_num.csv", 'wb') as f:
        writer=csv.writer(f)
        writer.writerow(sample_training)
    f.close()
    
    with open(folder+"/sample_training_vectors.csv", 'wb') as f:
        writer=csv.writer(f)
        for item in sample_training:
            sample_training_vectors.append(feature_vectors[int(item)])
            writer.writerow(feature_vectors[int(item)])
    f.close() 
    
    with open(folder+"/sample_training_labels.csv", 'wb') as f:
        writer=csv.writer(f)
        for item in sample_training:
            sample_training_labels.append(binary_class_labels[int(item)])
            writer.writerow(binary_class_labels[int(item)])
    f.close()
    
    with open(folder+"/sample_test_vectors.csv", 'wb') as f:
        writer=csv.writer(f)
        for item in sample_test:
            sample_test_vectors.append(feature_vectors[int(item)])
            writer.writerow(feature_vectors[int(item)])
    f.close() 
    
    with open(folder+"/sample_test_labels.csv", 'wb') as f:
        writer=csv.writer(f)
        for item in sample_test:
            sample_test_labels.append(binary_class_labels[int(item)])
            writer.writerow(binary_class_labels[int(item)])
    f.close()
    modeling_time = 0
    classifying_time=0
    inversed_classified_labels = []

    for i in range(0,num_class):
        temp = []
        feature_num = feature_num_class[i]
        the_class_frequency = class_frequency[0][i]
        start_time = time.time()
        class_column = []
        for item in sample_training_labels :
            if item[0]=='':
                break
            class_column.append(item[i])
             
        knn=KNNClassifier.train(sample_training_vectors, class_column, K, typecode=None)
        modeling_time+=(time.time()-start_time)
        #print "modeling time = ", modeling_time, "s"
        #print "classifier number = ",i+1       
        for article in sample_test_vectors:
            start_time = time.time()
            if article[0]=='':
                break            
            label = KNNClassifier.classify(knn, article,feature_num, the_class_frequency,None , feature_weights[0])
            classifying_time += (time.time()-start_time)
            #print "classifying time = ", classifying_time,"s"
            #print "class number = ",i," label = ", label
            temp.append(label)
        print( i+1,"out of ",num_class," has been classified") 
        inversed_classified_labels.append(temp)  
    print("modeling time = ", modeling_time,"s", file = result)
    print("number of knn classifiers = ", num_class, file = result)
    print("classifying time = ", classifying_time, "s", file = result)
    #inverse-the inversed label matrix
    num_article = len(inversed_classified_labels[0])
    print("calssified article number = ", num_article, file = result)
    with open(folder+"/knn_classified_sample_labels.csv", 'wb') as f:
        writer = csv.writer(f)
        for i in range(0,num_article):
            temp = []
            for j in range(0,num_class):
                temp.append(inversed_classified_labels[j][i])
            writer.writerow(temp)
    f.close()
    calculate_accuracy()
예제 #7
0
    nrows=2,
    ncols=5,
    sharex=True,
    sharey=True,
)
ax = ax.flatten()
for i in range(10):
    img = X_train[y_train == i][0]
    ax[i].imshow(img, cmap='Greys', interpolation='nearest')
ax[0].set_xticks([])
ax[0].set_yticks([])
plt.tight_layout()
plt.show()
X_train = v.NormalVectorize(X_train)
X_test = v.NormalVectorize(X_test)
"""Average Classifier"""
AC = AverageClassifier.Train(X_train, y_train)
print("Average Classifier Accurary:",
      AverageClassifier.Test(X_test, y_test, AC) * 100, "%")
"""KNN Classifier"""
print("KNN Predict Classifier:")
print("KNN")
KNNClassifier.Test(X_test, y_test, X_train, y_train)
print("KNN with Lib")
KNN = KNNClassifier.KNNbyLib(X_train, y_train)
KNNClassifier.KNNByLibTest(X_test, y_test, KNN)
"""ANN Classifier"""
ANN = ANNClassifier.Train(X_train, y_train)
print("ANN Classifier Accurary:")
ANNClassifier.Test(X_test, y_test, ANN)
import numpy as np
import KNNClassifier

import matplotlib.pyplot as plt
if __name__ == '__main__':
    image_size = 28  # width and length
    no_of_different_labels = 10  #  i.e. 0, 1, 2, 3, ..., 9
    image_pixels = image_size * image_size
    data_path = "/mnist/"
    train_data = np.loadtxt(data_path + "mnist_train.csv", delimiter=",")
    test_data = np.loadtxt(data_path + "mnist_test.csv", delimiter=",")
    fac = 0.99 / 255
    train_imgs = np.asfarray(train_data[:, 1:]) * fac + 0.01
    test_imgs = np.asfarray(test_data[:, 1:]) * fac + 0.01

    train_labels = np.asfarray(train_data[:, :1])
    test_labels = np.asfarray(test_data[:, :1])

    knn = KNNClassifier(distance='Euclidean', K=5)
    knn.fit(train_imgs, train_labels)
    results = knn.predict(test_imgs)

    #
    # for i in range(10):
    #     img = train_imgs[i].reshape((28,28))
    #     plt.imshow(img, cmap="Greys")
    #     plt.show()

    from sklearn.metrics import confusion_matrix
    confusion_matrix(test_labels, results)
예제 #9
0
###########################################
## KNN test kodları

from sklearn.datasets import load_iris
from sklearn.utils import shuffle

iris_X, iris_y = load_iris(return_X_y=True)
iris_X, iris_y = shuffle(iris_X, iris_y)
X_train = iris_X[:-30]
X_test = iris_X[-30:]
y_train = iris_y[:-30]
y_test = iris_y[-30:]

from KNNClassifier import *

knn = KNNClassifier("eucledean", 10)
knn.buildModel(X_train, y_train)
knn.evaluateModel(X_test, y_test)
knn.showLabel(X_test[5], load_iris())

#########################################
## Naive bayes test kodları

from sklearn.datasets import load_iris
from sklearn.utils import shuffle

iris_X, iris_y = load_iris(return_X_y=True)
iris_X, iris_y = shuffle(iris_X, iris_y)
X_train = iris_X[:-30]
X_test = iris_X[-30:]
y_train = iris_y[:-30]
예제 #10
0
class FaceRecognizer:
    def __init__(self, dimensions=0, k_neighbors=5, use_kernel=False):
        '''
        Constructor for FaceRecognizer.

        Args (optional):
            k_rank (int): How many principal components to keep.
            k_neighbors (int): How many neighbors to compare against in the
                kNN classifier.
        '''

        self.pca_model = PCAModel(dimensions=dimensions, use_kernel=use_kernel)
        self.knn_classifier = KNNClassifier(neighbors=k_neighbors)
        self.instances = None

    def train(self, instances):
        '''
        Trains the recognizer with a set of faces.

        Args:
            instances (list<tuple<int, numpy.ndarray>>): List of label/face
                data pairs.
        '''

        self.instances = instances

        # Stack all of the faces together

        faces_list = list()
        for instance in instances:
            faces_list.append(instance[1])

        faces = np.vstack(faces_list).T

        # Learn principal components

        self.pca_model.fit(faces)

        # Add each class to the kNN classifier

        for instance in instances:
            label = instance[0]
            face = instance[1]
            t_face = self.pca_model.transform(face)
            self.knn_classifier.add_sample(label, t_face)

    def fit_knn(self):
        '''
        Fits the kNN classifier with the current instances.
        '''

        if self.instances is None:
            raise RuntimeError('FaceRecognizer has no instances')

        self.knn_classifier.reset()

        for instance in self.instances:
            label = instance[0]
            face = instance[1]
            t_face = self.pca_model.transform(face)
            self.knn_classifier.add_sample(label, t_face)

    def classify(self, face):
        '''
        Classifies a given face from the trained set.

        Args:
            face (numpy.ndarray): The face to classify.
        Returns:
            int, the class the face best belongs to.
        '''

        t_face = self.pca_model.transform(face)
        return self.knn_classifier.classify(t_face)

    def set_dimensions(self, dimensions):
        '''
        Sets the number of dimensions to use from PCA.

        Args:
            dimensions (int): The new number of dimensions.
        '''

        self.pca_model.dimensions = dimensions

        if self.instances is not None:
            self.fit_knn()

    def set_k_neighbors(self, k):
        '''
        Sets k for kNN classifier.

        Args:
            k (int): The new k for the classifier.
        '''

        self.knn_classifier.neighbors = k

    def set_kernel_variance(self, variance):
        '''
        Sets the variance for the RBF kernel and retrains.

        Args:
            variance (float): The new variance.
        '''

        self.pca_model.variance = variance

        if self.instances is not None and self.pca_model.use_kernel:
            self.train(self.instances)
예제 #11
0
class FaceRecognizer:

    def __init__(self, dimensions=0, k_neighbors=5, use_kernel=False):
        '''
        Constructor for FaceRecognizer.

        Args (optional):
            k_rank (int): How many principal components to keep.
            k_neighbors (int): How many neighbors to compare against in the
                kNN classifier.
        '''

        self.pca_model = PCAModel(dimensions=dimensions, use_kernel=use_kernel)
        self.knn_classifier = KNNClassifier(neighbors=k_neighbors)
        self.instances = None


    def train(self, instances):
        '''
        Trains the recognizer with a set of faces.

        Args:
            instances (list<tuple<int, numpy.ndarray>>): List of label/face
                data pairs.
        '''

        self.instances = instances

        # Stack all of the faces together

        faces_list = list()
        for instance in instances:
            faces_list.append(instance[1])

        faces = np.vstack(faces_list).T

        # Learn principal components

        self.pca_model.fit(faces)

        # Add each class to the kNN classifier

        for instance in instances:
            label = instance[0]
            face  = instance[1]
            t_face = self.pca_model.transform(face)
            self.knn_classifier.add_sample(label, t_face)


    def fit_knn(self):
        '''
        Fits the kNN classifier with the current instances.
        '''

        if self.instances is None:
            raise RuntimeError('FaceRecognizer has no instances')

        self.knn_classifier.reset()

        for instance in self.instances:
            label = instance[0]
            face  = instance[1]
            t_face = self.pca_model.transform(face)
            self.knn_classifier.add_sample(label, t_face)


    def classify(self, face):
        '''
        Classifies a given face from the trained set.

        Args:
            face (numpy.ndarray): The face to classify.
        Returns:
            int, the class the face best belongs to.
        '''

        t_face = self.pca_model.transform(face)
        return self.knn_classifier.classify(t_face)


    def set_dimensions(self, dimensions):
        '''
        Sets the number of dimensions to use from PCA.

        Args:
            dimensions (int): The new number of dimensions.
        '''

        self.pca_model.dimensions = dimensions

        if self.instances is not None:
            self.fit_knn()


    def set_k_neighbors(self, k):
        '''
        Sets k for kNN classifier.

        Args:
            k (int): The new k for the classifier.
        '''

        self.knn_classifier.neighbors = k


    def set_kernel_variance(self, variance):
        '''
        Sets the variance for the RBF kernel and retrains.

        Args:
            variance (float): The new variance.
        '''

        self.pca_model.variance = variance

        if self.instances is not None and self.pca_model.use_kernel:
            self.train(self.instances)