def reduce_dim(args, data_loader): save_to = '/'.join( args.data_path.split('/')[:-1]) + '/landslide_reduced.npy' if args.reduce_dim == 'NCA': rdim = metric_learn.NCA(max_iter=10000000, num_dims=2, verbose=True, tol=0.0001) else: raise ValueError print('(%s) ---- preparing to join the data ----' % ctime()) X, y = join_data(args, data_loader) print('(%s) ---- data is joined ----' % ctime()) rdim.fit(X, y) print('(%s) ---- model is fit and dimension is successfully reduced ----' % ctime()) X_new = rdim.transform(X) n_datamat = np.concatenate((X_new, y), 1) np.save(save_to, n_datamat) print('(%s) ---- new features are transformed and saved ----' % ctime()) np.save(args.save_model_to + 'metric.npy', rdim.transformer()) print('(%s) ---- learned transformer matrix is saved ----' % ctime()) if args.visualize: visualize(n_datamat) return rdim
def test_nca(self): self.assertEqual( remove_spaces(str(metric_learn.NCA())), remove_spaces("NCA(init=None, max_iter=100," "n_components=None, " "num_dims='deprecated', " "preprocessor=None, random_state=None, " "tol=None, verbose=False)"))
def test_nca(self): def_kwargs = {'init': 'auto', 'max_iter': 100, 'n_components': None, 'preprocessor': None, 'random_state': None, 'tol': None, 'verbose': False} nndef_kwargs = {'max_iter': 42} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.NCA(max_iter=42))), remove_spaces(f"NCA({merged_kwargs})"))
def test_string_repr(self): # we don't test LMNN here because it could be python_LMNN self.assertEqual(str(metric_learn.Covariance()), "Covariance()") self.assertEqual(str(metric_learn.NCA()), "NCA(learning_rate=0.01, max_iter=100, num_dims=None)") self.assertEqual(str(metric_learn.LFDA()), "LFDA(dim=None, k=7, metric='weighted')") self.assertEqual(str(metric_learn.ITML()), """ ITML(convergence_threshold=0.001, gamma=1.0, max_iters=1000, verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.ITML_Supervised()), """ ITML_Supervised(A0=None, bounds=None, convergence_threshold=0.001, gamma=1.0, max_iters=1000, num_constraints=None, num_labeled=inf, verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.LSML()), "LSML(max_iter=1000, tol=0.001, verbose=False)") self.assertEqual(str(metric_learn.LSML_Supervised()), """ LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled=inf, prior=None, tol=0.001, verbose=False, weights=None) """.strip('\n')) self.assertEqual(str(metric_learn.SDML()), """ SDML(balance_param=0.5, sparsity_param=0.01, use_cov=True, verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.SDML_Supervised()), """ SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled=inf, sparsity_param=0.01, use_cov=True, verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.RCA()), "RCA(dim=None)") self.assertEqual(str(metric_learn.RCA_Supervised()), "RCA_Supervised(chunk_size=2, dim=None, num_chunks=100)") self.assertEqual(str(metric_learn.MLKR()), """ MLKR(A0=None, alpha=0.0001, epsilon=0.01, max_iter=1000, num_dims=None) """.strip('\n'))
# Neighborhood components analysis aims at "learning" a distance metric # by finding a linear transformation of input data such that the average # leave-one-out (LOO) classification performance of a soft-nearest # neighbors rule is maximized in the transformed space. The key insight to # the algorithm is that a matrix :math:`A` corresponding to the # transformation can be found by defining a differentiable objective function # for :math:`A`, followed by use of an iterative solver such as # `scipy.optimize.fmin_l_bfgs_b`. Like LMNN, this algorithm does not try to # cluster points from the same class in a unique cluster, because it # enforces conditions at a local neighborhood scale. # # - See more in the :ref:`User Guide <nca>` # - See more in the documentation of the class :py:class:`NCA # <metric_learn.NCA>` nca = metric_learn.NCA(max_iter=1000) X_nca = nca.fit_transform(X, y) plot_tsne(X_nca, y) ###################################################################### # Local Fisher Discriminant Analysis # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # LFDA is a linear supervised dimensionality reduction method. It is # particularly useful when dealing with multimodality, where one ore more # classes consist of separate clusters in input space. The core # optimization problem of LFDA is solved as a generalized eigenvalue # problem. Like LMNN, and NCA, this algorithm does not try to cluster points # from the same class in a unique cluster. #
def test_nca(self): self.assertEqual(str(metric_learn.NCA()), "NCA(max_iter=100, num_dims=None, preprocessor=None, " "tol=None, verbose=False)")
def test_nca(self): self.assertEqual( str(metric_learn.NCA()), "NCA(learning_rate=0.01, max_iter=100, num_dims=None)")
def test_nca(self): self.assertEqual(str(metric_learn.NCA()), ("NCA(learning_rate='deprecated', max_iter=100, " "num_dims=None, tol=None,\n verbose=False)"))
CLASSES = { 'Covariance': metric_learn.Covariance(), 'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200), 'LFDA': metric_learn.LFDA(k=2, dim=2), 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), 'MLKR': metric_learn.MLKR(), 'NCA': metric_learn.NCA(max_iter=700, n_components=2), 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, chunk_size=2), 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500) } class IrisDataset(object): params = [sorted(CLASSES)] param_names = ['alg'] def setup(self, alg): iris_data = load_iris() self.iris_points = iris_data['data'] self.iris_labels = iris_data['target']
rank_accuracies, mAP = evaluate_metric(X_test_pca.T, y_test, X_test_pca.T, y_test, metric='mahalanobis', parameters=M) rank_accuracies_l_2.append(rank_accuracies) mAP_l_2.append(mAP) metric_l_2.append('Learnt LMNN') # import metric_learn NCA = metric_learn.NCA(max_iter=10) NCA.fit(X_train_pca, y_train.T) N = NCA.metric() print('Metric learnt-NCA') rank_accuracies, mAP = evaluate_metric(X_test_pca.T, y_test, X_test_pca.T, y_test, metric='mahalanobis', parameters=N) rank_accuracies_l_2.append(rank_accuracies) mAP_l_2.append(mAP)
import numpy as np from sklearn.datasets import load_iris import metric_learn CLASSES = { 'Covariance': metric_learn.Covariance(), 'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200), 'LFDA': metric_learn.LFDA(k=2, dim=2), 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), 'MLKR': metric_learn.MLKR(), 'NCA': metric_learn.NCA(max_iter=700, num_dims=2), 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, chunk_size=2), 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500), } try: from metric_learn.lmnn import python_LMNN if python_LMNN is not metric_learn.LMNN: CLASSES['python_LMNN'] = python_LMNN(k=5, learn_rate=1e-6, verbose=False) except ImportError: pass class IrisDataset(object): params = [sorted(CLASSES)] param_names = ['alg'] def setup(self, alg):
def nca(self, train_X, train_y, test_X, dims): learner = ml.NCA(num_dims=dims) train_X = learner.fit_transform(train_X, train_y) test_X = learner.transform(test_X) return train_X, test_X
def test_nca(self): self.assertEqual(remove_spaces(str(metric_learn.NCA(max_iter=42))), remove_spaces("NCA(max_iter=42)"))
CLASSES = { 'Covariance': metric_learn.Covariance(), 'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200), 'LFDA': metric_learn.LFDA(k=2, dim=2), 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), 'MLKR': metric_learn.MLKR(), 'NCA': metric_learn.NCA(max_iter=700, learning_rate=0.01, num_dims=2), 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, chunk_size=2), 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500), } try: from metric_learn.lmnn import python_LMNN if python_LMNN is not metric_learn.LMNN: CLASSES['python_LMNN'] = python_LMNN(k=5, learn_rate=1e-6, verbose=False) except ImportError: pass