Python NearestNeighbors.predict Examples

Programming Language: Python

Namespace/Package Name: sklearn.neighbors

Class/Type: NearestNeighbors

Method/Function: predict

Examples at hotexamples.com: 12

Python NearestNeighbors.predict - 12 examples found. These are the top rated real world Python examples of sklearn.neighbors.NearestNeighbors.predict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NearestNeighbors(30)

set_params(30)

radius_neighbors_graph(30)

radius_neighbors(30)

kneighbors_graph(30)

kneighbors(30)

fit(30)

predict(12)

sum(9)

compute_neighbors(7)

search(5)

query(5)

__init__(4)

distances(3)

search_by_vector(3)

knnQueryBatch(3)

get_params(3)

add_data(3)

build(3)

createIndex(3)

addDataPointBatch(3)

transform(2)

add(2)

setQueryTimeParams(2)

ravel(2)

predict_proba(1)

_random_state(1)

decision_function(1)

score(1)

nonzero(1)

_graph_mode(1)

_fitid(1)

_cluster_mode(1)

train(1)

_n_clusters(1)

nn(1)

nn_index(1)

closest_neighbor(1)

fit_predict(1)

fit_transform(1)

flatten(1)

getNearestDist(1)

getNearestDistToMature(1)

get_feature_names(1)

kneighborgs(1)

find_nnf(1)

astype(1)

kneighbours(1)

append(1)

metric(1)

Example #1

Show file

File: nearestneighbors.py Project: kritikaparmar-programmer/DataScientist

def _nearestneighbors(*,
                      train,
                      test,
                      x_predict=None,
                      metrics,
                      n_neighbors=5,
                      radius=1.0,
                      algorithm='auto',
                      leaf_size=30,
                      metric='minkowski',
                      p=2,
                      metric_params=None,
                      n_jobs=None):
    """
    For more info visit :
    https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors
    """

    model = NearestNeighbors(n_neighbors=n_neighbors,
                             radius=radius,
                             algorithm=algorithm,
                             leaf_size=leaf_size,
                             metric=metric,
                             p=p,
                             metric_params=metric_params,
                             n_jobs=n_jobs)
    model.fit(train[0], train[1])
    model_name = 'Nearest Neighbors'
    y_hat = model.predict(test[0])

    if metrics == 'accuracy':
        accuracy = accuracy_score(test[1], y_hat)

    if metrics == 'f1':
        accuracy = f1_score(test[1], y_hat)

    if metrics == 'jaccard':
        accuracy = jaccard_score(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)

Example #2

Show file

def wine_cross():
    wine = datasets.load_wine()
    x = wine.data
    y = wine.target
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3, random_state=42)

    clf = NearestNeighbors(n_neighbors=5)
    clf = KMeans(n_clusters=4, random_state=0)
    clf.fit(x_train)
    clf.labels_

    y_pred = clf.predict(x_test)
    print('accuracy: ', accuracy_score(y_test, y_pred))

Example #3

Show file

def fixed_outlier_detector_by_LOF(feature, outlier_fraction):
	"""
	this function takes a training data X, output the outlier index in X
	with the outlier fraction is outlier_fraction
	"""
	model = NearestNeighbors(contamination= outlier_fraction)
	model.fit(feature)
	y_predict = model.predict(feature)
	outliers = []
	i=0
	for y in y_predict:
		if y == -1:
			outliers.append(i)
		i=i+1
	return outliers

Example #4

Show file

    def predict(self):
        """
        trains the scikit-learn  python machine learning algorithm library function
        https://scikit-learn.org

        then passes the trained algorithm the features set and returns the
        predicted y test values form, the function

        then compares the y_test values from scikit-learn predicted to
        y_test values passed in

        then returns the accuracy
        """
        algorithm = NearestNeighbors(n_neighbors=2)
        algorithm.fit(self.X_train, self.y_train)
        y_pred = list(algorithm.predict(self.X_test))
        self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test)
        return self.acc

Example #5

Show file

File: main.py Project: kaz1m1r/BlackJackHack

    def knn(self, partitions, predictors, outcome):
        # making individual that's necessary to determine the optimal amount of neighbors
        test_individual = partitions['valid_X'][predictors].iloc[0, :]

        # making initial KNN model
        knn = NearestNeighbors(n_neighbors=3)
        knn.fit(partitions['train_X'][predictors])

        results = []
        for k in range(1, 40):
            knn = KNeighborsClassifier(n_neighbors=k).fit(
                partitions['train_X'], partitions['train_y'])
            results.append({
                'k':
                k,
                'accuracy':
                accuracy_score(predictors['valid_y'],
                               knn.predict(predictors['valid_X']))
            })

Example #6

Show file

File: K_nearest_neighbors.py Project: ertprs/Machine_Learning_Algorithm

# K-NN
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split

irisDataset = datasets.load_iris()

irisFeatures = irisDataset.data
irisTarget = irisDataset.target

xTrain, xTest, yTrain, yTest = train_test_split(irisFeatures,
                                                irisTarget,
                                                test_size=0.2)

from sklearn.neighbors import NearestNeighbors

knn = NearestNeighbors(n_neighbors=3, algorithm='ballTree')
knn.fit(xTrain, yTrain)
yPred = knn.predict(xTest)

from sklearn.metrics import confusion_matrix, f1_score

confusion_matrix = confusion_matrix(yTest, yPred)
f1_score = f1_score(yTest, yPred, average='weighted')

print("Confusion Matrix: \n", confusion_matrix)
print("F1-Score: ", f1_score)

Example #7

Show file

    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]

#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color="b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color="r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
################################################################################

### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary
from sklearn.neighbors import NearestNeighbors
clf = NearestNeighbors(n_neighbors=2)
clf = clf.fit(features_train, labels_train)
pred = clf.predict(features_test)

from sklearn.metrics import accuracy_score
acc = accuracy_score(pred, labels_test)
print acc

try:
    prettyPicture(clf, features_test, labels_test)
except NameError:
    pass

Example #8

Show file

File: clusters.py Project: rpmcruz/taxis

def run(tr, ts):
    Xtr = tr.as_matrix(['lat', 'lon'])
    Xts = ts.as_matrix(['lat', 'lon'])

    print('check outliers...')
    m = NearestNeighbors(10).fit(Xtr)

    dtr, _ = m.kneighbors(Xtr)
    dtr = np.mean(dtr[:, 1:], 1)

    dts, _ = m.kneighbors(Xts)
    dts = np.mean(dts[:, :-1], 1)

    tr_inliers = dtr < 0.02
    ts_inliers = dts < 0.02

    print('clustering all points...')
    k_all = 10
    m = KMeans(k_all)
    _Ctr = m.fit_predict(Xtr[tr_inliers])
    _Cts = m.predict(Xts[ts_inliers])

    # outliers = cluster 0
    _Ctr += 1
    Ctr = np.zeros(len(Xtr), int)
    Ctr[tr_inliers] = _Ctr

    _Cts += 1
    Cts = np.zeros(len(Xts), int)
    Cts[ts_inliers] = _Cts

    Dtr = m.transform(Xtr)
    Dts = m.transform(Xts)

    # one hot encoding
    Ctr = np.asarray([[int(c == i) for c in Ctr] for i in range(k_all + 1)]).T
    Cts = np.asarray([[int(c == i) for c in Cts] for i in range(k_all + 1)]).T

    Xtr_ = np.c_[Ctr, Dtr]
    Xts_ = np.c_[Cts, Dts]

    print('clustering across revenue classes...')
    k_across = 3
    y = tr.as_matrix(['y'])[:, 0]
    Dtrs = []
    Dtss = []
    for klass in range(1, 6):
        Xtr[y == klass]
        m = KMeans(k_across)
        m.fit(Xtr[np.logical_and(tr_inliers, y == klass)])
        Dtrs.append(np.amin(m.transform(Xtr), 1))
        Dtss.append(np.amin(m.transform(Xts), 1))

    Dtrs = np.asarray(Dtrs).T
    Dtss = np.asarray(Dtss).T

    Xtr_ = np.c_[Xtr_, Dtrs]
    Xts_ = np.c_[Xts_, Dtss]

    names = ['cluster-%d' % i for i in range(k_all+1)] + \
        ['cluster-dist-%d' % i for i in range(k_all)] + \
        ['cluster-class-dist-%d' % i for i in range(1, 6)]
    return pd.DataFrame(Xtr_, columns=names), pd.DataFrame(Xts_, columns=names)

Example #9

Show file

def fast_knn(X,
             n_clusters=5,
             n_neighbors=None,
             graph_mode='distance',
             cluster_mode='spectral',
             algorithm='brute',
             n_jobs=1,
             random_state=1234,
             force_sklearn=False):
    r"""
  Arguments:
    X : `ndarray` or tuple of (X, y)
    n_neighbors: int (default = 5)
      The top K closest datapoints you want the algorithm to return.
      Currently, this value must be < 1024.
    graph_mode : {'distance', 'connectivity'}, default='distance'
      This mode decides which values `kneighbors_graph` will return:
        - 'connectivity' : will return the connectivity matrix with ones and
          zeros (for 'SpectralClustering').
        - 'distance' : will return the distances between neighbors according
          to the given metric (for 'DBSCAN').
    cluster_mode: {'vote', 'spectral', 'isomap'}, default='vote'
        This mode decides how to generate cluster prediction from the
        neighbors graph:
        - 'dbscan' :
        - 'spectral' :
        - 'isomap' :
        - 'kmeans' :
    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
        Algorithm used to compute the nearest neighbors:
        - 'ball_tree' will use :class:`BallTree`
        - 'kd_tree' will use :class:`KDTree`
        - 'brute' will use a brute-force search.
        - 'auto' will attempt to decide the most appropriate algorithm
          based on the values passed to :meth:`fit` method.
        Note: fitting on sparse input will override the setting of
        this parameter, using brute force.
  """
    kwargs = dict(locals())
    X = kwargs.pop('X')
    force_sklearn = kwargs.pop('force_sklearn')
    random_state = kwargs.pop('random_state')
    n_clusters = int(kwargs.pop('n_clusters'))
    if n_neighbors is None:
        kwargs['n_neighbors'] = n_clusters
        n_neighbors = n_clusters
    ## graph mode
    graph_mode = str(kwargs.pop('graph_mode')).strip().lower()
    assert graph_mode in ('distance', 'connectivity')
    ## cluster mode
    cluster_mode = str(kwargs.pop('cluster_mode')).strip().lower()
    ## fine-tuning the kwargs
    use_cuml = _check_cuml(force_sklearn)
    if use_cuml:
        from cuml.neighbors import NearestNeighbors
        kwargs['n_gpus'] = kwargs['n_jobs']
        kwargs.pop('n_jobs')
        kwargs.pop('algorithm')
    else:
        from sklearn.neighbors import NearestNeighbors
    ## fitting
    knn = NearestNeighbors(**kwargs)
    knn.fit(X)
    knn._fitid = id(X)
    ## Transform mode
    knn._random_state = random_state
    knn._n_clusters = n_clusters
    knn._graph_mode = graph_mode
    knn._cluster_mode = cluster_mode
    if use_cuml:
        knn.n_samples_fit_ = X.shape[0]
    knn.kneighbors_graph = types.MethodType(nn_kneighbors_graph, knn)
    knn.transform = types.MethodType(nn_transform, knn)
    knn.fit_transform = types.MethodType(nn_fit_transform, knn)
    knn.predict = types.MethodType(nn_predict, knn)
    return knn

Example #10

Show file

File: 63 K近邻分类.py Project: fuyihang/ClassificationModel

# 5.超参优化（略）
from sklearn.model_selection import GridSearchCV

params = {'n_neighbors': range(1, 10)}
mdl = KNeighborsClassifier()
grid = GridSearchCV(mdl, param_grid=params)
grid.fit(X, y)

print('最优参数:', grid.best_params_)
print('最优得分:', grid.best_score_)

mdl = grid.best_estimator_

# 6.评估模型
y_pred = mdl.predict(X)
displayClassifierMetrics(y, y_pred, mdl.classes_)

y_prob = mdl.predict_proba(X)
displayROCurve(y, y_prob, mdl.classes_)

# 相关类
# KNeighborsClassifier(n_neighbors=5,weights=’uniform’,algorithm=’auto’,
#               leaf_size=30,p=2,metric=’minkowski’,metric_params=None,n_jobs=1,*kwargs)
# n_neighbors: int, 可选参数(默认为 5)
# weights（权重）: str or callable(自定义类型), 可选参数(默认为 ‘uniform’)
# 用于预测的权重函数。可选参数如下:
# - ‘uniform’ : 统一的权重. 在每一个邻居区域里的点的权重都是一样的。
# - ‘distance’ : 权重点等于他们距离的倒数。使用此函数，更近的邻居对于所预测的点的影响更大。
# - [callable] : 一个用户自定义的方法，此方法接收一个距离的数组，然后返回一个相同形状并且包含权重的数组。
# algorithm（算法）: {‘auto’, ‘ball_tree’, ‘kd_tree’, ‘brute’}, 可选参数（默认为 'auto'）

Example #11

Show file

# In[45]:

#accuracy

train_X = trainNorm[['zinventorygrowth', 'zpopulationgrowth']]
train_y = trainNorm['yoygtenp']
valid_X = validNorm[['zinventorygrowth', 'zpopulationgrowth']]
valid_y = validNorm['yoygtenp']

# Train a classifier for different values of k
results = []
for k in range(1, 12):
    knn = KNeighborsClassifier(n_neighbors=k).fit(train_X, train_y)
    results.append({
        'k': k,
        'accuracy': accuracy_score(valid_y, knn.predict(valid_X))
    })

# Convert results to a pandas data frame
results = pd.DataFrame(results)
print(results)

# Retrain with full dataset---KNN
retail_X = retailNorm[['zinventorygrowth', 'zpopulationgrowth']]
retail_y = retailNorm['yoygtenp']
knn = KNeighborsClassifier(n_neighbors=4).fit(retail_X, retail_y)
distances, indices = knn.kneighbors(newretailNorm)
print(knn.predict(newretailNorm))
print('Distances', distances)
print('Indices', indices)
print(retailNorm.iloc[indices[0], :])

Example #12

Show file

File: main.py Project: IvyCodes/CS229-Project

def knn_predictor(x_train, y_train, x_test, y_test):
	clf = NearestNeighbors(n_neighbors = 5)
	clf.fit(x_train)
	accuracy = clf.score(x_test, y_test)
	f1 = precision_recall_fscore_support(y_test, clf.predict(x_test), average = 'weighted')[2]
	print(accuracy, f1)