def runNCA(X_train, X_test, y_train, y_test): transformer = NCA(max_iter=100, verbose=True) transformer.fit(X_train, y_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) np.save('X_train_NCA', X_train_proj) np.save('X_test_NCA', X_test_proj) return X_train_proj, X_test_proj
def test_iris(self): n = self.iris_points.shape[0] # Without dimension reduction nca = NCA(max_iter=(100000//n)) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # With dimension reduction nca = NCA(max_iter=(100000//n), num_dims=2) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.20)
def test_iris(self): n = self.iris_points.shape[0] # Without dimension reduction nca = NCA(max_iter=(100000 // n)) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(), self.iris_labels) self.assertLess(csep, 0.15) # With dimension reduction nca = NCA(max_iter=(100000 // n), num_dims=2, tol=1e-9) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(), self.iris_labels) self.assertLess(csep, 0.20)
class NCA: def __init__(self): self.metric_model = NCA_ml() self.X_tr = None self.y_train = None self.X_te = None def fit(self, X_tr, y_train): """Fits the model to the prescribed data.""" self.X_tr = X_tr self.y_train = y_train return self.metric_model.fit(X_tr, y_train) def transform(self, X): """Transforms the test data according to the model""" return self.metric_model.transform(X) def predict_proba(self, X_te): """Predicts the probabilities of each of the test samples""" test_samples = X_te.shape[0] self.X_tr = self.transform(self.X_tr) clf = NearestCentroid() clf.fit(self.X_tr, self.y_train) centroids = clf.centroids_ probabilities = np.zeros((test_samples, centroids.shape[0])) for sample in xrange(test_samples): probabilities[sample] = sk_nearest_neighbour_proba( centroids, X_te[sample, :]) return probabilities
def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000 // n)) nca.fit(self.X, self.y) res_1 = nca.transform(self.X) nca = NCA(max_iter=(100000 // n)) res_2 = nca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def NCA(self): print "Warning the features will be transformed" lmnn = NCA() NCA.fit(self.features, targets) self.features = NCA.transform(self.features) self.prepare_for_testing() #Evaluate with nn self.nearest_neighbors("NCA + KNN")
def test_nca(self): n = self.X.shape[0] nca = NCA(max_iter=(100000//n)) nca.fit(self.X, self.y) res_1 = nca.transform(self.X) nca = NCA(max_iter=(100000//n)) res_2 = nca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def nca_fit(X_train, Y_train, X_test, Y_test, color_map): nca = NCA(init='pca', max_iter=5000) nca.fit(X_train, Y_train) X_train_transformed = nca.transform(X_train) if (X_train.shape[1] == 2): plt.figure() plt.scatter(X_train_transformed[:, 0], X_train_transformed[:, 1], c=color_map[Y_train], s=2) plt.savefig("after_nca_transform_train.png", dpi=300) X_test_transformed = nca.transform(X_test) if (X_test.shape[1] == 2): plt.figure() plt.scatter(X_test_transformed[:, 0], X_test_transformed[:, 1], c=color_map[Y_test], s=2) plt.savefig("after_nca_transform_test.png", dpi=300) return (X_train_transformed, X_test_transformed)
def test_simple_example(self): """Test on a simple example. Puts four points in the input space where the opposite labels points are next to each other. After transform the same labels points should be next to each other. """ X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) nca = NCA(n_components=2,) nca.fit(X, y) Xansformed = nca.transform(X) np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], np.array([2, 3, 0, 1]))
def test_simple_example(self): """Test on a simple example. Puts four points in the input space where the opposite labels points are next to each other. After transform the same labels points should be next to each other. """ X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) nca = NCA(num_dims=2,) nca.fit(X, y) Xansformed = nca.transform(X) np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], np.array([2, 3, 0, 1]))
def test_iris(self): n = self.iris_points.shape[0] # Without dimension reduction nca = NCA(max_iter=(100000 // n), learning_rate=0.01) nca.fit(self.iris_points, self.iris_labels) # Result copied from Iris example at # https://github.com/vomjom/nca/blob/master/README.mkd expected = [[-0.09935, -0.2215, 0.3383, 0.443], [+0.2532, 0.5835, -0.8461, -0.8915], [-0.729, -0.6386, 1.767, 1.832], [-0.9405, -0.8461, 2.281, 2.794]] assert_array_almost_equal(expected, nca.transformer(), decimal=3) # With dimension reduction nca = NCA(max_iter=(100000 // n), learning_rate=0.01, num_dims=2) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(), self.iris_labels) self.assertLess(csep, 0.15)
if __name__ == '__main__': parser = argparse.ArgumentParser("NCA") parser.add_argument('--data-root', default='./data/raw_split') parser.add_argument('--n-components', type=int, default=2) parser.add_argument('--max-iter', type=int, default=100) args = parser.parse_args() name = f"{args.n_components}_{args.max_iter}" data_save_folder = f"./data/NCA/{name}" makedirs(data_save_folder) X_train, X_test, y_train, y_test = load_split(args) print(X_train.shape) t = time.time() nca = NCA(n_components=args.n_components, max_iter=args.max_iter, verbose=1) nca.fit(X_train, y_train) print(" # NCA fit done.") np.save(osp.join(data_save_folder, "feature_train.npy"), nca.transform(X_train)) np.save(osp.join(data_save_folder, "label_train.npy"), y_train) np.save(osp.join(data_save_folder, "feature_test.npy"), nca.transform(X_test)) np.save(osp.join(data_save_folder, "label_test.npy"), y_test)
return { 'cv_score': top_score, 'accuracy': accuracy, 'roc': roc, 'majority_accuracy': majority_accuracy, 'majority_roc': majority_roc } for d in range(len(dataset_collection)): print("Metric learning") nca1 = NCA(max_iter=1000, learning_rate=0.01) nca1.fit(x_train, y_train) t_x_train = nca1.transform() nca2 = NCA(max_iter=1000, learning_rate=0.01) nca2.fit(x_test, np.array(y_test)) t_x_test = nca2.transform() dat = [t_x_train, t_x_test] nn_metric = KNeighborsClassifier() nn_metric_params = { "n_neighbors": range(5, max(6, len(data) / 10)), 'leaf_size': range(30, 100) } classifier_stats['nn_metric'] = test_classifier(nn_metric, nn_metric_params, dat) out = pickle.dump(
def nca_mnist_experiment(trial, train_percentage=0.1, test_percentage=0.1): encoding_train_imgs_path = './data/MNIST_encoding/tf_train.encoding' encoding_test_imgs_path = './data/MNIST_encoding/tf_test.encoding' train_labels_path = './data/MNIST_encoding/tf_train.labels' test_labels_path = './data/MNIST_encoding/tf_test.labels' encoding_train = pickle.load(open(encoding_train_imgs_path, 'rb')) encoding_test = pickle.load(open(encoding_test_imgs_path, 'rb')) print(encoding_train.shape) train_labels = pickle.load(open(train_labels_path, 'rb')) test_labels = pickle.load(open(test_labels_path, 'rb')) print(train_labels.shape) m = len(encoding_train) train_m = int(m * train_percentage) sel = random.sample(range(m), train_m) X = encoding_train.astype(np.float)[sel] y = train_labels[sel] print(X.shape) print(y.shape) m = len(encoding_test) test_m = int(m * test_percentage) sel = random.sample(range(m), test_m) X_test = encoding_test.astype(np.float)[sel] y_test = test_labels[sel] print(X_test.shape) print(y_test.shape) knn = kNN() k_valus = [1, 3, 5, 7] for k in k_valus: knn.k = k acc_list = [] for _ in range(trial): acc = knn.evaluate(X, y, X_test, y_test) acc_list.append(acc) print(np.mean(np.array(acc_list))) nca = NCA(max_iter=100, learning_rate=0.01) nca.fit(X, y) x_train = nca.transform() x_test = nca.transform(X_test) for k in k_valus: knn.k = k acc_list = [] for _ in range(trial): acc = knn.evaluate(x_train, y, x_test, y_test) acc_list.append(acc) print(np.mean(np.array(acc_list)))
learn_rate=1e-6, verbose=True) start_time = time.time() lmnn.fit(pca.train_sample_projection, original_train_labels) end_time = time.time() print("Learning time: %s" % (end_time - start_time)) transformed_query_features = lmnn.transform(pca_query_features) transformed_gallery_features = lmnn.transform(pca_gallery_features) compute_k_mean(num_of_clusters, transformed_query_features, transformed_gallery_features, gallery_labels) # Compute NCA (Neighbourhood Components Analysis) Learning print("\n-----NCA-----") nca = NCA(max_iter=20, verbose=True) nca.fit(original_train_features, original_train_labels) transformed_query_features = nca.transform(query_features) transformed_gallery_features = nca.transform(gallery_features) compute_k_mean(num_of_clusters, transformed_query_features, transformed_gallery_features, gallery_labels) # Compute PCA_NCA Learning print("\n-----PCA_NCA-----") nca = NCA(max_iter=20, verbose=True) start_time = time.time() nca.fit(pca.train_sample_projection, original_train_labels) end_time = time.time() print("Learning time: %s" % (end_time - start_time)) transformed_query_features = nca.transform(pca_query_features) transformed_gallery_features = nca.transform(pca_gallery_features) compute_k_mean(num_of_clusters, transformed_query_features, transformed_gallery_features, gallery_labels)
def nca(data, label, dim): nca = NCA(num_dims=dim, max_iter=1000, learning_rate=0.01) nca.fit(data, label) result = nca.transform(data) return result
y_min, y_max = X[:, 1].min(), X[:, 1].max() x_center = (x_min + x_max) / 2 y_center = (y_min + y_max) / 2 max_diff = max(x_max - x_min, y_max - y_min) margin = max_diff / 20 y_lims = (y_center - max_diff / 2 - margin, y_center + max_diff / 2 + margin) x_lims = (x_center - max_diff / 2 - margin, x_center + max_diff / 2 + margin) plt.figure() plt.scatter(X[:, 0], X[:, 1], c=y) plt.xlim(*x_lims) plt.ylim(*y_lims) plt.axis('equal') plt.savefig(name) plot_points(X, y, 'supervised_without_metric') nca = NCA() nca.fit(X, y) X_e = nca.transform(X) plot_points(X_e, y, 'supervised_with_metric')