def runNCA(X_train, X_test, y_train, y_test):
    transformer = NCA(max_iter=100, verbose=True)
    transformer.fit(X_train, y_train)
    X_train_proj = transformer.transform(X_train)
    X_test_proj = transformer.transform(X_test)
    np.save('X_train_NCA', X_train_proj)
    np.save('X_test_NCA', X_test_proj)
    return X_train_proj, X_test_proj
Ejemplo n.º 2
0
  def test_iris(self):
    n = self.iris_points.shape[0]

    # Without dimension reduction
    nca = NCA(max_iter=(100000//n))
    nca.fit(self.iris_points, self.iris_labels)
    csep = class_separation(nca.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.15)

    # With dimension reduction
    nca = NCA(max_iter=(100000//n), num_dims=2)
    nca.fit(self.iris_points, self.iris_labels)
    csep = class_separation(nca.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.20)
Ejemplo n.º 3
0
    def test_iris(self):
        n = self.iris_points.shape[0]

        # Without dimension reduction
        nca = NCA(max_iter=(100000 // n))
        nca.fit(self.iris_points, self.iris_labels)
        csep = class_separation(nca.transform(), self.iris_labels)
        self.assertLess(csep, 0.15)

        # With dimension reduction
        nca = NCA(max_iter=(100000 // n), num_dims=2, tol=1e-9)
        nca.fit(self.iris_points, self.iris_labels)
        csep = class_separation(nca.transform(), self.iris_labels)
        self.assertLess(csep, 0.20)
Ejemplo n.º 4
0
class NCA:
    def __init__(self):
        self.metric_model = NCA_ml()
        self.X_tr = None
        self.y_train = None
        self.X_te = None

    def fit(self, X_tr, y_train):
        """Fits the model to the prescribed data."""
        self.X_tr = X_tr
        self.y_train = y_train
        return self.metric_model.fit(X_tr, y_train)

    def transform(self, X):
        """Transforms the test data according to the model"""
        return self.metric_model.transform(X)

    def predict_proba(self, X_te):
        """Predicts the probabilities of each of the test samples"""
        test_samples = X_te.shape[0]
        self.X_tr = self.transform(self.X_tr)
        clf = NearestCentroid()
        clf.fit(self.X_tr, self.y_train)
        centroids = clf.centroids_
        probabilities = np.zeros((test_samples, centroids.shape[0]))
        for sample in xrange(test_samples):
            probabilities[sample] = sk_nearest_neighbour_proba(
                centroids, X_te[sample, :])
        return probabilities
  def test_nca(self):
    n = self.X.shape[0]
    nca = NCA(max_iter=(100000 // n))
    nca.fit(self.X, self.y)
    res_1 = nca.transform(self.X)

    nca = NCA(max_iter=(100000 // n))
    res_2 = nca.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
Ejemplo n.º 6
0
    def NCA(self):
        print "Warning the features will be transformed"
        lmnn = NCA()
        NCA.fit(self.features, targets)
        
        self.features = NCA.transform(self.features)
        self.prepare_for_testing()

        #Evaluate with nn
        self.nearest_neighbors("NCA + KNN")             
Ejemplo n.º 7
0
  def test_nca(self):
    n = self.X.shape[0]
    nca = NCA(max_iter=(100000//n))
    nca.fit(self.X, self.y)
    res_1 = nca.transform(self.X)

    nca = NCA(max_iter=(100000//n))
    res_2 = nca.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
def nca_fit(X_train, Y_train, X_test, Y_test, color_map):
    nca = NCA(init='pca', max_iter=5000)
    nca.fit(X_train, Y_train)
    X_train_transformed = nca.transform(X_train)
    if (X_train.shape[1] == 2):
        plt.figure()
        plt.scatter(X_train_transformed[:, 0],
                    X_train_transformed[:, 1],
                    c=color_map[Y_train],
                    s=2)
        plt.savefig("after_nca_transform_train.png", dpi=300)
    X_test_transformed = nca.transform(X_test)
    if (X_test.shape[1] == 2):
        plt.figure()
        plt.scatter(X_test_transformed[:, 0],
                    X_test_transformed[:, 1],
                    c=color_map[Y_test],
                    s=2)
        plt.savefig("after_nca_transform_test.png", dpi=300)
    return (X_train_transformed, X_test_transformed)
Ejemplo n.º 9
0
  def test_simple_example(self):
    """Test on a simple example.

    Puts four points in the input space where the opposite labels points are
    next to each other. After transform the same labels points should be next
    to each other.

    """
    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
    y = np.array([1, 0, 1, 0])
    nca = NCA(n_components=2,)
    nca.fit(X, y)
    Xansformed = nca.transform(X)
    np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1],
                            np.array([2, 3, 0, 1]))
Ejemplo n.º 10
0
  def test_simple_example(self):
    """Test on a simple example.

    Puts four points in the input space where the opposite labels points are
    next to each other. After transform the same labels points should be next
    to each other.

    """
    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
    y = np.array([1, 0, 1, 0])
    nca = NCA(num_dims=2,)
    nca.fit(X, y)
    Xansformed = nca.transform(X)
    np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1],
                            np.array([2, 3, 0, 1]))
Ejemplo n.º 11
0
    def test_iris(self):
        n = self.iris_points.shape[0]

        # Without dimension reduction
        nca = NCA(max_iter=(100000 // n), learning_rate=0.01)
        nca.fit(self.iris_points, self.iris_labels)
        # Result copied from Iris example at
        # https://github.com/vomjom/nca/blob/master/README.mkd
        expected = [[-0.09935, -0.2215, 0.3383, 0.443],
                    [+0.2532, 0.5835, -0.8461, -0.8915],
                    [-0.729, -0.6386, 1.767, 1.832],
                    [-0.9405, -0.8461, 2.281, 2.794]]
        assert_array_almost_equal(expected, nca.transformer(), decimal=3)

        # With dimension reduction
        nca = NCA(max_iter=(100000 // n), learning_rate=0.01, num_dims=2)
        nca.fit(self.iris_points, self.iris_labels)
        csep = class_separation(nca.transform(), self.iris_labels)
        self.assertLess(csep, 0.15)
Ejemplo n.º 12
0
  def test_iris(self):
    n = self.iris_points.shape[0]

    # Without dimension reduction
    nca = NCA(max_iter=(100000 // n), learning_rate=0.01)
    nca.fit(self.iris_points, self.iris_labels)
    # Result copied from Iris example at
    # https://github.com/vomjom/nca/blob/master/README.mkd
    expected = [[-0.09935, -0.2215,  0.3383,  0.443],
                [+0.2532,   0.5835, -0.8461, -0.8915],
                [-0.729,   -0.6386,  1.767,   1.832],
                [-0.9405,  -0.8461,  2.281,   2.794]]
    assert_array_almost_equal(expected, nca.transformer(), decimal=3)

    # With dimension reduction
    nca = NCA(max_iter=(100000 // n), learning_rate=0.01, num_dims=2)
    nca.fit(self.iris_points, self.iris_labels)
    csep = class_separation(nca.transform(), self.iris_labels)
    self.assertLess(csep, 0.15)
Ejemplo n.º 13
0

if __name__ == '__main__':
    parser = argparse.ArgumentParser("NCA")
    parser.add_argument('--data-root', default='./data/raw_split')
    parser.add_argument('--n-components', type=int, default=2)
    parser.add_argument('--max-iter', type=int, default=100)
    args = parser.parse_args()

    name = f"{args.n_components}_{args.max_iter}"
    data_save_folder = f"./data/NCA/{name}"
    makedirs(data_save_folder)

    X_train, X_test, y_train, y_test = load_split(args)
    print(X_train.shape)

    t = time.time()

    nca = NCA(n_components=args.n_components,
              max_iter=args.max_iter,
              verbose=1)
    nca.fit(X_train, y_train)
    print(" # NCA fit done.")

    np.save(osp.join(data_save_folder, "feature_train.npy"),
            nca.transform(X_train))
    np.save(osp.join(data_save_folder, "label_train.npy"), y_train)
    np.save(osp.join(data_save_folder, "feature_test.npy"),
            nca.transform(X_test))
    np.save(osp.join(data_save_folder, "label_test.npy"), y_test)
Ejemplo n.º 14
0
    return {
        'cv_score': top_score,
        'accuracy': accuracy,
        'roc': roc,
        'majority_accuracy': majority_accuracy,
        'majority_roc': majority_roc
    }


for d in range(len(dataset_collection)):

    print("Metric learning")

    nca1 = NCA(max_iter=1000, learning_rate=0.01)
    nca1.fit(x_train, y_train)
    t_x_train = nca1.transform()
    nca2 = NCA(max_iter=1000, learning_rate=0.01)
    nca2.fit(x_test, np.array(y_test))
    t_x_test = nca2.transform()
    dat = [t_x_train, t_x_test]
    nn_metric = KNeighborsClassifier()
    nn_metric_params = {
        "n_neighbors": range(5, max(6,
                                    len(data) / 10)),
        'leaf_size': range(30, 100)
    }

    classifier_stats['nn_metric'] = test_classifier(nn_metric,
                                                    nn_metric_params, dat)

    out = pickle.dump(
Ejemplo n.º 15
0
def nca_mnist_experiment(trial, train_percentage=0.1, test_percentage=0.1):

    encoding_train_imgs_path = './data/MNIST_encoding/tf_train.encoding'
    encoding_test_imgs_path = './data/MNIST_encoding/tf_test.encoding'

    train_labels_path = './data/MNIST_encoding/tf_train.labels'
    test_labels_path = './data/MNIST_encoding/tf_test.labels'

    encoding_train = pickle.load(open(encoding_train_imgs_path, 'rb'))
    encoding_test = pickle.load(open(encoding_test_imgs_path, 'rb'))

    print(encoding_train.shape)

    train_labels = pickle.load(open(train_labels_path, 'rb'))
    test_labels = pickle.load(open(test_labels_path, 'rb'))

    print(train_labels.shape)

    m = len(encoding_train)
    train_m = int(m * train_percentage)
    sel = random.sample(range(m), train_m)
    X = encoding_train.astype(np.float)[sel]
    y = train_labels[sel]

    print(X.shape)
    print(y.shape)

    m = len(encoding_test)
    test_m = int(m * test_percentage)
    sel = random.sample(range(m), test_m)

    X_test = encoding_test.astype(np.float)[sel]
    y_test = test_labels[sel]

    print(X_test.shape)
    print(y_test.shape)

    knn = kNN()
    k_valus = [1, 3, 5, 7]
    for k in k_valus:
        knn.k = k

        acc_list = []
        for _ in range(trial):
            acc = knn.evaluate(X, y, X_test, y_test)
            acc_list.append(acc)

        print(np.mean(np.array(acc_list)))

    nca = NCA(max_iter=100, learning_rate=0.01)
    nca.fit(X, y)
    x_train = nca.transform()
    x_test = nca.transform(X_test)

    for k in k_valus:
        knn.k = k

        acc_list = []
        for _ in range(trial):
            acc = knn.evaluate(x_train, y, x_test, y_test)
            acc_list.append(acc)

        print(np.mean(np.array(acc_list)))
            learn_rate=1e-6,
            verbose=True)
start_time = time.time()
lmnn.fit(pca.train_sample_projection, original_train_labels)
end_time = time.time()
print("Learning time: %s" % (end_time - start_time))
transformed_query_features = lmnn.transform(pca_query_features)
transformed_gallery_features = lmnn.transform(pca_gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)

# Compute NCA (Neighbourhood Components Analysis) Learning
print("\n-----NCA-----")
nca = NCA(max_iter=20, verbose=True)
nca.fit(original_train_features, original_train_labels)
transformed_query_features = nca.transform(query_features)
transformed_gallery_features = nca.transform(gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)

# Compute PCA_NCA Learning
print("\n-----PCA_NCA-----")
nca = NCA(max_iter=20, verbose=True)
start_time = time.time()
nca.fit(pca.train_sample_projection, original_train_labels)
end_time = time.time()
print("Learning time: %s" % (end_time - start_time))
transformed_query_features = nca.transform(pca_query_features)
transformed_gallery_features = nca.transform(pca_gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)
Ejemplo n.º 17
0
 def nca(data, label, dim):
     nca = NCA(num_dims=dim, max_iter=1000, learning_rate=0.01)
     nca.fit(data, label)
     result = nca.transform(data)
     return result
    y_min, y_max = X[:, 1].min(), X[:, 1].max()

    x_center = (x_min + x_max) / 2
    y_center = (y_min + y_max) / 2

    max_diff = max(x_max - x_min, y_max - y_min)
    margin = max_diff / 20

    y_lims = (y_center - max_diff / 2 - margin,
              y_center + max_diff / 2 + margin)
    x_lims = (x_center - max_diff / 2 - margin,
              x_center + max_diff / 2 + margin)

    plt.figure()

    plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.xlim(*x_lims)
    plt.ylim(*y_lims)
    plt.axis('equal')
    plt.savefig(name)


plot_points(X, y, 'supervised_without_metric')

nca = NCA()
nca.fit(X, y)

X_e = nca.transform(X)

plot_points(X_e, y, 'supervised_with_metric')