Beispiel #1
0
def test_pipeline_equivalency():
    X = iris_data
    y = iris_target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # Use init='identity' to ensure reproducibility
    lmnn_params = dict(n_neighbors=3,
                       max_iter=10,
                       init='identity',
                       random_state=42)
    n_neighbors = 3

    lmnn = LargeMarginNearestNeighbor(**lmnn_params)
    lmnn.fit(X_train, y_train)

    lmnn_pipe = make_lmnn_pipeline(**lmnn_params)
    lmnn_pipe.fit(X_train, y_train)

    pipe_transformation = lmnn_pipe.named_steps.lmnn.components_
    assert_array_almost_equal(lmnn.components_, pipe_transformation)

    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(lmnn.transform(X_train), y_train)
    score = knn.score(lmnn.transform(X_test), y_test)

    score_pipe = lmnn_pipe.score(X_test, y_test)

    assert (score == score_pipe)
Beispiel #2
0
def test_neighbors_digits():
    # Sanity check on the digits dataset
    # the 'brute' algorithm has been observed to fail if the input
    # dtype is uint8 due to overflow in distance calculations.

    X = digits_data.astype('uint8')
    y = digits_target
    n_samples, n_features = X.shape
    train_test_boundary = int(n_samples * 0.8)
    train = np.arange(0, train_test_boundary)
    test = np.arange(train_test_boundary, n_samples)
    X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]

    k = 1
    lmnn = LargeMarginNearestNeighbor(n_neighbors=k, max_iter=30)
    lmnn.fit(X_train, y_train)
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(lmnn.transform(X_train), y_train)
    score_uint8 = knn.score(lmnn.transform(X_test), y_test)

    knn.fit(lmnn.transform(X_train.astype(float)), y_train)
    score_float = knn.score(lmnn.transform(X_test.astype(float)), y_test)

    assert (score_uint8 == score_float)
Beispiel #3
0
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    lmnn = LargeMarginNearestNeighbor(n_neighbors=1)
    lmnn.fit(iris_data, iris_target)
    knn = KNeighborsClassifier(n_neighbors=lmnn.n_neighbors_)
    LX = lmnn.transform(iris_data)
    knn.fit(LX, iris_target)
    y_pred = knn.predict(LX)

    assert_array_equal(y_pred, iris_target)

    lmnn.set_params(n_neighbors=9)
    lmnn.fit(iris_data, iris_target)
    knn = KNeighborsClassifier(n_neighbors=lmnn.n_neighbors_)
    knn.fit(LX, iris_target)

    assert (knn.score(LX, iris_target) > 0.95)
Beispiel #4
0
csv = np.genfromtxt("data/numerical_train.csv", delimiter=',')
csv_test = np.genfromtxt("data/numerical_test.csv", delimiter=',')
n, d = csv.shape

X_train = csv[:, :d - 1]
y_train = csv[:, -1]

X_test = csv_test[:, :d - 1]
y_test = csv_test[:, -1]

k_train, n_components, max_iter = 7, d - 1, 180

lmnn = LMNN(n_neighbors=k_train, max_iter=max_iter, n_components=n_components)

print('learning the metric...')

# Train the metric learner
lmnn.fit(X_train, y_train)

X_train_transformed = lmnn.transform(X_train)
X_test_transformed = lmnn.transform(X_test)

pickle.dump(X_train_transformed,
            open("data/numerical_train_transformed.pkl", 'wb'))
pickle.dump(y_train, open("data/numerical_train_labels.pkl", 'wb'))
pickle.dump(X_test_transformed,
            open("data/numerical_test_transformed.pkl", 'wb'))
pickle.dump(y_test, open("data/numerical_test_labels.pkl", 'wb'))
pickle.dump(lmnn, open("data/lmnn.pkl", 'wb'))

print('done!')
Beispiel #5
0
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

from pylmnn import LargeMarginNearestNeighbor as LMNN


# Load a data set
X, y = load_iris(return_X_y=True)

# Split in training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, stratify=y, random_state=42)

# Set up the hyperparameters
k_train, k_test, n_components, max_iter = 3, 3, X.shape[1], 180

# Instantiate the metric learner
lmnn = LMNN(n_neighbors=k_train, max_iter=max_iter, n_components=n_components)

# Train the metric learner
lmnn.fit(X_train, y_train)

# Fit the nearest neighbors classifier
knn = KNeighborsClassifier(n_neighbors=k_test)
knn.fit(lmnn.transform(X_train), y_train)

# Compute the k-nearest neighbor test accuracy after applying the learned transformation
lmnn_acc = knn.score(lmnn.transform(X_test), y_test)
print('LMNN accuracy on test set of {} points: {:.4f}'.format(X_test.shape[0], lmnn_acc))
Beispiel #6
0
acc2 = []
acc3 = []
acc4 = []
T = []
T1 = []
T2 = []
T3 = []
T4 = []

for k in [9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29]:
    print('Running K={} ... ... '.format(k))

    t0 = time.time()
    lmnn = LMNN(n_neighbors=k, max_iter=200, n_components=x.shape[1])
    lmnn.fit(x_train, y_train)
    x_train_ = lmnn.transform(x_train)
    x_test_ = lmnn.transform(x_test)
    t1 = time.time()
    T.append(t1 - t0)
    print('LMNN Cost:', t1 - t0)

    knn = KNeighborsClassifier(n_neighbors=k,
                               weights='distance',
                               metric='cosine',
                               algorithm='brute')
    knn.fit(x_train_, y_train)
    lmnn_acc = knn.score(x_test_, y_test)
    acc1.append(lmnn_acc)
    t2 = time.time()
    T1.append(t2 - t1)
    print('cosine Cost:', t2 - t1, '|accuracy:', lmnn_acc)