Esempio n. 1
0
 def get_knn_probability(k, training_data, training_data_class, test_data):
     """ Use knn to compute probabilities of test_data belonging to training_data_class
         k: number of nearest neighbors
         training_data: training training_data instances
         training_data_class: classes of each training training_data instance
         test_data: test_data to classify
         returns: probabilities of each input instance belonging to each class
     """
     num_inputs = np.shape(test_data)[0]
     unique_classes = np.unique(training_data_class)
     unique_class_to_index = {}
     for i in range(len(unique_classes)):
         unique_class_to_index[unique_classes[i]] = i
     num_classes = len(unique_classes)
     if False:
         print 'num_inputs =', num_inputs
         print 'num_classes =', num_classes
         print 'training_data_class =', training_data_class
         print 'unique_classes =', unique_classes
         print 'unique_class_to_index =', unique_class_to_index
         exit()
         print 'training_data =', training_data
         
         print 'test_data =', test_data
     probabilites = np.zeros((num_inputs,num_classes),dtype = 'f')
     
     if USE_KD_TREE:
         print 'Training kd tree'
         kd_tree = KDTree(training_data)
         print 'Done training kd, tree'
 
     for n in range(num_inputs):
     
         if USE_KD_TREE:
             distances, indices = kd_tree.query(test_data[n,:], k=k)
         else:
             distances = np.sqrt(np.sum((training_data - test_data[n,:])**2, axis = 1))
             indices = np.argsort(distances, axis = 0)
 
         #print 'i =', test_data[n,:]
         #print 'd =', distances
         #print 'indices =', indices
 
         classes = training_data_class[indices[:k]]
         if False:
             print 'classes =', classes
            
         class_totals = np.zeros(num_classes)
         for i in range(classes.shape[0]):
             class_totals[unique_class_to_index[classes[i]]] += 1
         #print 'class_totals =', class_totals
         for i in range(class_totals.shape[0]):
             probabilites[n,i] = class_totals[i]/classes.shape[0]
         
     return unique_classes, probabilites
Esempio n. 2
0
def get_knn(k, training_data_class, test_data, kd_tree):
    """ Naive implementation of k nearest neighbours
        k: number of nearest neighbours
        training_data_class: classes of each training training_data instance
        test_data: test_data to classify
        returns: classes of each input instance
    """
    num_inputs = np.shape(test_data)[0]
    if True:
        print 'k =', k
        print 'num_inputs =', num_inputs
        print 'training_data_class =', training_data_class[:20]
 
    closest = np.zeros(num_inputs)

    for n in range(num_inputs):
        retval = kd_tree.query(test_data[n,:], k=k)
        #print 'kd_tree.query returned', retval
        distances, indices = retval

        if k == 1:
            indices = np.array([indices])
            distances = np.array([distances])
       
        #print 'k =', k
        #print 'i =', test_data[n,:]
        #print 'd =', distances
        #print 'indices =', indices

        classes = training_data_class[indices[:k]]
        #print 'classes =', classes

        classes = np.unique(classes)
        #print 'unique classes =', classes
        if len(classes) == 1:
            closest[n] = np.unique(classes)
        else:
            #print 'x'*10
            counts = np.zeros(max(classes) + 1)
            for i in range(k):
                counts[training_data_class[indices[i]]] += 1
            #print 'counts =', counts
            closest[n] = np.argmax(counts)

    return closest