def _kdtree(*, train, test, x_predict=None, metrics, X, leaf_size=40, metric='minkowski', **kwargs): """ For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KDTree.html#sklearn.neighbors.KDTree """ model = KDTree(X, leaf_size=leaf_size, metric=metric, **kwargs) model.fit(train[0], train[1]) model_name = 'KD Tree' y_hat = model.predict(test[0]) if metrics == 'accuracy': accuracy = accuracy_score(test[1], y_hat) if metrics == 'f1': accuracy = f1_score(test[1], y_hat) if metrics == 'jaccard': accuracy = jaccard_score(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
def main(): y, X_train, X_test, y_train, y_test = load_data() # use kd-tree model = KDTree(X_train, metric='euclidean') ind = model.query(X_test, k=3, return_distance=False) predictions = [] k_nearest_indexes = ind.tolist() for i in k_nearest_indexes: counts = np.bincount(np.array(y[i])) predictions.append(np.argmax(counts)) # knn model = KNNClassifier() model.fit(X_train, y_train) predictions = model.predict(X_test) accuracy = accuracy_score(y_test, predictions) print(f"KNN Accuracy: {accuracy}")
dist, ind = tree.query(data[:100], k=5) print(dist[0]) print(ind[0]) dist, ind = tree.query(np.zeros([1, 784], dtype=np.float32), k=5) print(dist[0]) print(ind[0]) nbrs = { "d0": dist[:, 0], "d1": dist[:, 1], "d2": dist[:, 2], "d3": dist[:, 3], "d4": dist[:, 4], "i0": ind[:, 0], "i1": ind[:, 1], "i2": ind[:, 2], "i3": ind[:, 3], "i4": ind[:, 4], } csv = pd.DataFrame(nbrs) csv.to_csv("../../data/mnist_nbrs.csv") tree = pygrandma.PyGrandma() tree.set_cutoff(10) tree.set_scale_base(1.3) tree.fit(data) print(tree.knn(data[0], 5))