class Number_recognition:
    def __init__(self, test_images, test_labels, k):
        self.test_images = test_images;
        self.test_labels = test_labels;
        self.k = k;

    def load(self, path_img, path_lbl):
        with open(path_lbl, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049,'
                    'got %d' % magic)

            labels = array("B", file.read())

        labels_result = np.zeros(shape=(size))


        for i in xrange(size):
            labels_result[i] = labels[i]

        with open(path_img, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            print "rows: " + str(rows) + "  cols: " + str(cols)
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051,'
                        'got %d' % magic)
            image_data = array("B", file.read())

        images = np.zeros(shape=(size,rows*cols))

        for i in xrange(size):
            images[i][:] = image_data[i*rows*cols : (i+1)*rows*cols]


        return images, labels_result

    def load_train(self):
        ims, labels = self.load( self.test_images, self.test_labels)

        self.test_images = ims
        self.test_labels = labels
        labels_numbers = MulticlassLabels(self.test_labels)
        feats  = RealFeatures(self.test_images.T)
        dist = EuclideanDistance()
        self.knn = KNN(self.k, dist, labels_numbers)
        self.knn.train(feats)

    def predict(self, image):
        feats_test  = RealFeatures(image. T)
        pred = self.knn.apply_multiclass(feats_test)
        return pred[:]
def run_knn(Xtrain,Ytrain,Xtest,Ytest):
    prod_features = RealFeatures(Xtrain)
    prod_labels = MulticlassLabels(Ytrain)
    test_features = RealFeatures(Xtest)
    test_labels = MulticlassLabels(Ytest)

    if os.path.exists(".lmnn_model30000_5_reg05_cor20"):
        print "Using LMNN distance"
        lmnn = LMNN()
        sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'r')
        lmnn.load_serializable(sf)

        diagonal = np.diag(lmnn.get_linear_transform())
        #print('%d out of %d elements are non-zero.' % (np.sum(diagonal != 0), diagonal.size))
        #diagonal = lmnn.get_linear_transform()
        np.set_printoptions(precision=1,threshold=1e10,linewidth=500)

        #lmnn.set_diagonal(True)
        dist = lmnn.get_distance()
    else:
        dist = EuclideanDistance()

    # classifier
    knn = KNN(K, dist, prod_labels)
    #knn.set_use_covertree(True)
    parallel = knn.get_global_parallel()
    parallel.set_num_threads(4)
    knn.set_global_parallel(parallel)
    knn.train(prod_features)

    print "Classifying test set..."
    pred = knn.apply_multiclass(test_features)

    print "Accuracy = %2.2f%%" % (100*np.mean(pred == Ytest))

    cm = build_confusion_matrix(Ytest, pred, NCLASSES)
    #save_confusion_matrix(cm)
    #cm = load_confusion_matrix()
    print "Confusion matrix: "
    print cm
    #plot_confusion_matrix(cm)

    #results = predict_class_prob(pred, cm)
    
    #nn = build_neighbours_matrix(knn, prod_labels)
    #results = predict_class_from_neighbours(nn)

    #print "Log loss: " + str(calculate_log_loss(results, Ytest))

    #print_prediction_output(results)
    return cm
# load LMNN
if os.path.exists(".lmnn_model30000_5_reg05_cor20"):
    sf = SerializableAsciiFile(".lmnn_model30000_5_reg05_cor20", 'r')
    lmnn = LMNN()
    lmnn.load_serializable(sf)

    diagonal = np.diag(lmnn.get_linear_transform())
    print('%d out of %d elements are non-zero.' % (np.sum(diagonal != 0), diagonal.size))
    #print diagonal
    dist = lmnn.get_distance()
else:
    dist = EuclideanDistance()

cm = load_confusion_matrix()
print cm

# classifier
knn = KNN(k, dist, prod_labels)
parallel = knn.get_global_parallel()
parallel.set_num_threads(4)
knn.set_global_parallel(parallel)
knn.train(prod_features)

print "Classifying test set..."
pred = knn.apply_multiclass(test_features)

results = predict_class_prob(pred, cm)
print_prediction_output(results)

Beispiel #4
0
def evaluate(labels,
             feats,
             params={
                 'n_neighbors': 2,
                 'use_cover_tree': 'True',
                 'dist': 'Manhattan'
             },
             Nsplit=2):
    """
        Run Cross-validation to evaluate the KNN.

        Parameters
        ----------
        labels: 2d array
            Data set labels.
        feats: array
            Data set feats.
        params: dictionary
            Search scope parameters.
        Nsplit: int, default = 2
            The n for n-fold cross validation.
        all_ks: range of int, default = range(1, 21)
            Numbers of neighbors.
    """
    k = params.get('n_neighbors')
    use_cover_tree = params.get('use_cover_tree') == 'True'
    if params.get('dist' == 'Euclidean'):
        func_dist = EuclideanDistance
    else:
        func_dist = ManhattanMetric

    split = CrossValidationSplitting(labels, Nsplit)
    split.build_subsets()

    accuracy = np.zeros(Nsplit)
    acc_train = np.zeros(accuracy.shape)
    time_test = np.zeros(accuracy.shape)
    for i in range(Nsplit):
        idx_train = split.generate_subset_inverse(i)
        idx_test = split.generate_subset_indices(i)

        feats.add_subset(idx_train)
        labels.add_subset(idx_train)

        dist = func_dist(feats, feats)
        knn = KNN(k, dist, labels)
        knn.set_store_model_features(True)
        if use_cover_tree:
            knn.set_knn_solver_type(KNN_COVER_TREE)
        else:
            knn.set_knn_solver_type(KNN_BRUTE)
        knn.train()

        evaluator = MulticlassAccuracy()
        pred = knn.apply_multiclass()
        acc_train[i] = evaluator.evaluate(pred, labels)

        feats.remove_subset()
        labels.remove_subset()
        feats.add_subset(idx_test)
        labels.add_subset(idx_test)

        t_start = time.clock()
        pred = knn.apply_multiclass(feats)
        time_test[i] = (time.clock() - t_start) / labels.get_num_labels()

        accuracy[i] = evaluator.evaluate(pred, labels)

        feats.remove_subset()
        labels.remove_subset()
    print accuracy.mean()
    return accuracy