Ejemplo n.º 1
0
def test_verbose():
    # assert there is proper output when verbose = 1
    old_stdout = sys.stdout
    sys.stdout = StringIO()

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, verbose=1)
    try:
        lmnn.fit(iris_data, iris_target)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # check output
    assert ("[LargeMarginNearestNeighbor]" in out)
    assert ("Finding principal components" in out)
    assert ("Finding the target neighbors" in out)
    assert ("Computing static part of the gradient" in out)
    assert ("Finding principal components" in out)
    assert ("Training took" in out)

    # assert by default there is no output (verbose=0)
    old_stdout = sys.stdout
    sys.stdout = StringIO()

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3)
    try:
        lmnn.fit(iris_data, iris_target)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # check output
    assert (out == '')
Ejemplo n.º 2
0
def test_singleton_class():
    X = iris_data
    y = iris_target
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)

    # one singleton class
    singleton_class = 1
    ind_singleton, = np.where(y_tr == singleton_class)
    y_tr[ind_singleton] = 2
    y_tr[ind_singleton[0]] = singleton_class

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, max_iter=30)
    lmnn.fit(X_tr, y_tr)

    # One non-singleton class
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
    ind_1, = np.where(y_tr == 1)
    ind_2, = np.where(y_tr == 2)
    y_tr[ind_1] = 0
    y_tr[ind_1[0]] = 1
    y_tr[ind_2] = 0
    y_tr[ind_2[0]] = 2

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, max_iter=30)
    assert_raise_message(
        ValueError, 'LargeMarginNearestNeighbor needs at least 2 '
        'non-singleton classes, got 1.', lmnn.fit, X_tr, y_tr)
Ejemplo n.º 3
0
def test_n_components():
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    init = np.random.rand(X.shape[1] - 1, 3)

    # n_components = X.shape[1] != transformation.shape[0]
    n_components = X.shape[1]
    lmnn = LargeMarginNearestNeighbor(init=init, n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred embedding dimensionality '
        '`n_components` ({}) does not match '
        'the output dimensionality of the given '
        'linear transformation `init` ({})!'.format(n_components,
                                                    init.shape[0]), lmnn.fit,
        X, y)

    # n_components > X.shape[1]
    n_components = X.shape[1] + 2
    lmnn = LargeMarginNearestNeighbor(init=init, n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred embedding dimensionality '
        '`n_components` ({}) cannot be greater '
        'than the given data dimensionality ({})!'.format(
            n_components, X.shape[1]), lmnn.fit, X, y)

    # n_components < X.shape[1]
    lmnn = LargeMarginNearestNeighbor(n_components=2, init='identity')
    lmnn.fit(X, y)
Ejemplo n.º 4
0
def test_random_state():
    """Assert that when having more than max_impostors (forcing sampling),
    the same impostors will be sampled given the same random_state and
    different impostors will be sampled given a different random_state
    leading to a different transformation"""

    X = iris_data
    y = iris_target

    # Use init='identity' to ensure reproducibility
    params = {
        'n_neighbors': 3,
        'max_impostors': 5,
        'random_state': 1,
        'max_iter': 10,
        'init': 'identity'
    }

    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_1 = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_2 = lmnn.components_

    # This assertion fails on 32bit systems if init='pca'
    assert_allclose(transformation_1, transformation_2)

    params['random_state'] = 2
    lmnn = LargeMarginNearestNeighbor(**params)
    lmnn.fit(X, y)
    transformation_3 = lmnn.components_

    assert (not np.allclose(transformation_2, transformation_3))
Ejemplo n.º 5
0
def test_callback():
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, callback='my_cb')
    assert_raise_message(ValueError, '`callback` is not callable.', lmnn.fit,
                         iris_data, iris_target)

    max_iter = 10

    def my_cb(transformation, n_iter):
        rem_iter = max_iter - n_iter
        print('{} iterations remaining...'.format(rem_iter))

    # assert that my_cb is called
    old_stdout = sys.stdout
    sys.stdout = StringIO()

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      callback=my_cb,
                                      max_iter=max_iter,
                                      verbose=1)
    try:
        lmnn.fit(iris_data, iris_target)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # check output
    assert ('{} iterations remaining...'.format(max_iter - 1) in out)
Ejemplo n.º 6
0
def test_max_impostors():
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      max_impostors=1,
                                      impostor_store='list')
    lmnn.fit(iris_data, iris_target)

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      max_impostors=1,
                                      impostor_store='sparse')
    lmnn.fit(iris_data, iris_target)
Ejemplo n.º 7
0
def test_neighbors_params():
    from scipy.spatial.distance import hamming

    params = {'algorithm': 'brute', 'metric': hamming}
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, neighbors_params=params)
    lmnn.fit(iris_data, iris_target)
    components_hamming = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3)
    lmnn.fit(iris_data, iris_target)
    components_euclidean = lmnn.components_

    assert (not np.allclose(components_hamming, components_euclidean))
Ejemplo n.º 8
0
def test_pipeline_equivalency():
    X = iris_data
    y = iris_target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # Use init='identity' to ensure reproducibility
    lmnn_params = dict(n_neighbors=3,
                       max_iter=10,
                       init='identity',
                       random_state=42)
    n_neighbors = 3

    lmnn = LargeMarginNearestNeighbor(**lmnn_params)
    lmnn.fit(X_train, y_train)

    lmnn_pipe = make_lmnn_pipeline(**lmnn_params)
    lmnn_pipe.fit(X_train, y_train)

    pipe_transformation = lmnn_pipe.named_steps.lmnn.components_
    assert_array_almost_equal(lmnn.components_, pipe_transformation)

    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(lmnn.transform(X_train), y_train)
    score = knn.score(lmnn.transform(X_test), y_test)

    score_pipe = lmnn_pipe.score(X_test, y_test)

    assert (score == score_pipe)
Ejemplo n.º 9
0
def test_convergence_warning():

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, max_iter=2, verbose=1)
    cls_name = lmnn.__class__.__name__
    assert_warns_message(ConvergenceWarning,
                         '[{}] LMNN did not converge'.format(cls_name),
                         lmnn.fit, iris_data, iris_target)
Ejemplo n.º 10
0
def test_warm_start_effectiveness():
    # A 1-iteration second fit on same data should give almost same result
    # with warm starting, and quite different result without warm starting.

    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    n_iter = 10

    lmnn_warm = LargeMarginNearestNeighbor(n_neighbors=3,
                                           warm_start=True,
                                           max_iter=n_iter,
                                           random_state=0)
    lmnn_warm.fit(X_train, y_train)
    transformation_warm = lmnn_warm.components_
    lmnn_warm.max_iter = 1
    lmnn_warm.fit(X_train, y_train)
    transformation_warm_plus_one = lmnn_warm.components_

    lmnn_cold = LargeMarginNearestNeighbor(n_neighbors=3,
                                           warm_start=False,
                                           max_iter=n_iter,
                                           random_state=0)
    lmnn_cold.fit(X_train, y_train)
    transformation_cold = lmnn_cold.components_
    lmnn_cold.max_iter = 1
    lmnn_cold.fit(X_train, y_train)
    transformation_cold_plus_one = lmnn_cold.components_

    diff_warm = np.sum(
        np.abs(transformation_warm_plus_one - transformation_warm))
    diff_cold = np.sum(
        np.abs(transformation_cold_plus_one - transformation_cold))

    assert_true(
        diff_warm < 2.0,
        "Transformer changed significantly after one iteration even "
        "though it was warm-started.")

    assert_true(
        diff_cold > diff_warm,
        "Cold-started transformer changed less significantly than "
        "warm-started transformer after one iteration.")
Ejemplo n.º 11
0
def test_n_neighbors():
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    lmnn = LargeMarginNearestNeighbor(n_neighbors=2)
    assert_warns_message(
        UserWarning, '`n_neighbors` (=2) is not less than the number of '
        'samples in the smallest non-singleton class (=2). '
        '`n_neighbors_` will be set to 1 for estimation.', lmnn.fit, X, y)
Ejemplo n.º 12
0
def test_impostor_store():
    k = 3
    lmnn = LargeMarginNearestNeighbor(n_neighbors=k,
                                      init='identity',
                                      impostor_store='list')
    lmnn.fit(iris_data, iris_target)
    components_list = lmnn.components_

    lmnn = LargeMarginNearestNeighbor(n_neighbors=k,
                                      init='identity',
                                      impostor_store='sparse')
    lmnn.fit(iris_data, iris_target)
    components_sparse = lmnn.components_

    assert_array_almost_equal(components_list,
                              components_sparse,
                              err_msg='Toggling `impostor_store` results in '
                              'a different solution.')
Ejemplo n.º 13
0
def test_store_opt_result():
    X = iris_data
    y = iris_target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      max_iter=5,
                                      store_opt_result=True)
    lmnn.fit(X_train, y_train)
    transformation = lmnn.opt_result_.x
    assert (transformation.size == X.shape[1]**2)
Ejemplo n.º 14
0
def test_same_lmnn_parallel():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    lmnn = LargeMarginNearestNeighbor(n_neighbors=3)
    lmnn.fit(X_train, y_train)
    components = lmnn.components_

    lmnn.set_params(n_jobs=3)
    lmnn.fit(X_train, y_train)
    components_parallel = lmnn.components_

    assert_array_almost_equal(components, components_parallel)
Ejemplo n.º 15
0
def test_init_transformation():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_redundant=0,
                                        random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    # Initialize with identity
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init='identity')
    lmnn.fit(X_train, y_train)

    # Initialize with PCA
    lmnn_pca = LargeMarginNearestNeighbor(n_neighbors=3, init='pca')
    lmnn_pca.fit(X_train, y_train)

    # Initialize with a transformation given by the user
    init = np.random.rand(X.shape[1], X.shape[1])
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    lmnn.fit(X_train, y_train)

    # init.shape[1] must match X.shape[1]
    init = np.random.rand(X.shape[1], X.shape[1] + 1)
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    assert_raise_message(
        ValueError, 'The input dimensionality ({}) of the given '
        'linear transformation `init` must match the '
        'dimensionality of the given inputs `X` ({}).'.format(
            init.shape[1], X.shape[1]), lmnn.fit, X_train, y_train)

    # init.shape[0] must be <= init.shape[1]
    init = np.random.rand(X.shape[1] + 1, X.shape[1])
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3, init=init)
    assert_raise_message(
        ValueError, 'The output dimensionality ({}) of the given '
        'linear transformation `init` cannot be '
        'greater than its input dimensionality ({}).'.format(
            init.shape[0], init.shape[1]), lmnn.fit, X_train, y_train)

    # init.shape[0] must match n_components
    init = np.random.rand(X.shape[1], X.shape[1])
    n_components = X.shape[1] - 2
    lmnn = LargeMarginNearestNeighbor(n_neighbors=3,
                                      init=init,
                                      n_components=n_components)
    assert_raise_message(
        ValueError, 'The preferred embedding dimensionality '
        '`n_components` ({}) does not match '
        'the output dimensionality of the given '
        'linear transformation `init` ({})!'.format(n_components,
                                                    init.shape[0]), lmnn.fit,
        X_train, y_train)
Ejemplo n.º 16
0
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    lmnn = LargeMarginNearestNeighbor(n_neighbors=1)
    lmnn.fit(iris_data, iris_target)
    knn = KNeighborsClassifier(n_neighbors=lmnn.n_neighbors_)
    LX = lmnn.transform(iris_data)
    knn.fit(LX, iris_target)
    y_pred = knn.predict(LX)

    assert_array_equal(y_pred, iris_target)

    lmnn.set_params(n_neighbors=9)
    lmnn.fit(iris_data, iris_target)
    knn = KNeighborsClassifier(n_neighbors=lmnn.n_neighbors_)
    knn.fit(LX, iris_target)

    assert (knn.score(LX, iris_target) > 0.95)
Ejemplo n.º 17
0
def test_warm_start_validation():
    X, y = datasets.make_classification(n_samples=30,
                                        n_features=5,
                                        n_classes=4,
                                        n_redundant=0,
                                        n_informative=5,
                                        random_state=0)

    lmnn = LargeMarginNearestNeighbor(warm_start=True, max_iter=5)
    lmnn.fit(X, y)

    X_less_features, y = \
        datasets.make_classification(n_samples=30, n_features=4, n_classes=4,
                                     n_redundant=0, n_informative=4,
                                     random_state=0)
    assert_raise_message(
        ValueError, 'The new inputs dimensionality ({}) does not '
        'match the input dimensionality of the '
        'previously learned transformation ({}).'.format(
            X_less_features.shape[1], lmnn.components_.shape[1]), lmnn.fit,
        X_less_features, y)
Ejemplo n.º 18
0
def test_neighbors_digits():
    # Sanity check on the digits dataset
    # the 'brute' algorithm has been observed to fail if the input
    # dtype is uint8 due to overflow in distance calculations.

    X = digits_data.astype('uint8')
    y = digits_target
    n_samples, n_features = X.shape
    train_test_boundary = int(n_samples * 0.8)
    train = np.arange(0, train_test_boundary)
    test = np.arange(train_test_boundary, n_samples)
    X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]

    k = 1
    lmnn = LargeMarginNearestNeighbor(n_neighbors=k, max_iter=30)
    lmnn.fit(X_train, y_train)
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(lmnn.transform(X_train), y_train)
    score_uint8 = knn.score(lmnn.transform(X_test), y_test)

    knn.fit(lmnn.transform(X_train.astype(float)), y_train)
    score_float = knn.score(lmnn.transform(X_test.astype(float)), y_test)

    assert (score_uint8 == score_float)
Ejemplo n.º 19
0
data = fetch_olivetti_faces()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    stratify=y)

knn = KNeighborsClassifier(n_neighbors=3)

# Train with no transformation (euclidean metric)
knn.fit(X_train, y_train)

# Test with euclidean metric
acc = knn.score(X_test, y_test)

print('KNN accuracy on test set: {}%'.format(acc))

# LMNN is no longer a classifier but a transformer
lmnn = LargeMarginNearestNeighbor(n_neighbors=3, verbose=1)
lmnn.fit(X_train, y_train)

# Train with transformation learned by LMNN
knn.fit(lmnn.transform(X_train), y_train)

# Test with transformation learned by LMNN
acc = knn.score(lmnn.transform(X_test), y_test)

print('LMNN accuracy on test set: {}%'.format(acc))
Ejemplo n.º 20
0
# embedding
loreal_data = np.load('/export/home//loreal_135_classification/em_training.npz')
X_train, y_train = loreal_data['X'], loreal_data['y']

loreal_data = np.load('/export/home//loreal_135_classification/em_test.npz')
X_test, y_test = loreal_data['X'], loreal_data['y']
'''

knn = KNeighborsClassifier(n_neighbors=10)

# Train with no transformation (euclidean metric)
knn.fit(X_train, y_train)

# Test with euclidean metric
acc = knn.score(X_test, y_test)

print('KNN accuracy on test set: {}'.format(acc))


# LMNN is no longer a classifier but a transformer
lmnn = LargeMarginNearestNeighbor(n_neighbors=10, verbose=1, max_iter=300)
lmnn.fit(X_train, y_train)

# Train with transformation learned by LMNN
knn.fit(lmnn.transform(X_train), y_train)

# Test with transformation learned by LMNN
acc = knn.score(lmnn.transform(X_test), y_test)

print('LMNN accuracy on test set: {}'.format(acc))