def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) sdml.fit(self.X, self.y, random_state=seed) res_1 = sdml.transform() seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2)
def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, use_cov=False) sdml.fit(self.X, self.y, random_state=seed) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, use_cov=False) res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2)
def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5, prior='identity', random_state=seed) sdml.fit(self.X, self.y) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5, prior='identity', random_state=seed) res_2 = sdml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, use_cov=False, balance_param=1e-5) sdml.fit(self.X, self.y, random_state=seed) L = sdml.transformer_ assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix())
def get_dist_func(data : Array[np.float64], target : Array[np.float64]) -> Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64]: """ Get function that returns distances between examples in learned space. Args: data : Array[np.float64] - training data_trans target : int - target variable values (classes of training examples) Returns: Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64] -- higher order function that takes a matric function and returns a function that takes two indices of examples and returns distance between examples in learned metric space. """ # Get transformed data. data_trans : Array[np.float64] = SDML_Supervised().fit_transform(StandardScaler().fit_transform(data), target) # Computing distance: def dist_func_res(metric : Callable[[np.float64, np.float64], np.float64], i1 : np.int, i2 : np.int) -> np.float64: """ distance function that takes indices of examples in training set and returns distance in learned space using specified distance metric. Args: i1 : int - index of first training example i2 : int - index of second training example Returns: np.float64 - distance in learned metric space using specified metric between specified training examples. """ # Compute distance in learned metric space using specified metric. return metric(data_trans[i1, :], data_trans[i2, :]) return dist_func_res # Return distance function.
def sandwich_demo(): x, y = sandwich_data() knn = nearest_neighbors(x, k=2) ax = plt.subplot(3, 1, 1) # take the whole top row plot_sandwich_data(x, y, ax) plot_neighborhood_graph(x, knn, y, ax) ax.set_title('input space') ax.set_aspect('equal') ax.set_xticks([]) ax.set_yticks([]) mls = [ LMNN(), ITML_Supervised(num_constraints=200), SDML_Supervised(num_constraints=200), LSML_Supervised(num_constraints=200), ] for ax_num, ml in enumerate(mls, start=3): ml.fit(x, y) tx = ml.transform() ml_knn = nearest_neighbors(tx, k=2) ax = plt.subplot(3, 2, ax_num) plot_sandwich_data(tx, y, axis=ax) plot_neighborhood_graph(tx, ml_knn, y, axis=ax) ax.set_title(ml.__class__.__name__) ax.set_xticks([]) ax.set_yticks([]) plt.show()
def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, prior='identity', balance_param=1e-5, random_state=seed) sdml.fit(self.X, self.y) L = sdml.components_ assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix())
def test_sdml_works_on_non_spd_pb_with_skggm(self): """Test that SDML works on a certain non SPD problem on which we know it should work, but scikit-learn's graphical_lasso does not work""" X, y = load_iris(return_X_y=True) sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, use_cov=True) sdml.fit(X, y)
def process_sdml(self, **option): '''Metric Learning algorithm: SDML''' GeneExp = self.GeneExp_train Label = self.Label_train sdml = SDML_Supervised(**option) sdml.fit(GeneExp, Label) self.Trans['SDML'] = sdml.transformer()
def test_iris(self): # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! rs = np.random.RandomState(5555) sdml = SDML_Supervised(num_constraints=1500) sdml.fit(self.iris_points, self.iris_labels, random_state=rs) csep = class_separation(sdml.transform(), self.iris_labels) self.assertLess(csep, 0.25)
def test_verbose_has_not_installed_skggm_sdml_supervised(capsys): # Test that if users have installed skggm, a message is printed telling them # skggm's solver is used (when they use SDML_Supervised) # TODO: remove if we don't need skggm anymore X, y = make_classification(random_state=42) sdml = SDML_Supervised(verbose=True, balance_param=1e-5, use_cov=False) sdml.fit(X, y) out, _ = capsys.readouterr() assert "SDML will use scikit-learn's graphical lasso solver." in out
def test_verbose_has_installed_skggm_sdml_supervised(capsys): # Test that if users have installed skggm, a message is printed telling them # skggm's solver is used (when they use SDML_Supervised) # TODO: remove if we don't need skggm anymore X, y = load_iris(return_X_y=True) sdml = SDML_Supervised(verbose=True, prior='identity', balance_param=1e-5) sdml.fit(X, y) out, _ = capsys.readouterr() assert "SDML will use skggm's graphical lasso solver." in out
def test_deprecation(self): # test that the right deprecation message is thrown. # TODO: remove in v.0.5 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) sdml_supervised = SDML_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y)
def test_deprecation_num_labeled(self): # test that a deprecation message is thrown if num_labeled is set at # initialization # TODO: remove in v.0.6 X, y = make_classification(random_state=42) sdml_supervised = SDML_Supervised(num_labeled=np.inf, use_cov=False, balance_param=5e-5) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y)
def test_raises_no_warning_installed_skggm(self): # otherwise we should be able to instantiate and fit SDML and it # should raise no warning pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] X, y = make_classification(random_state=42) with pytest.warns(None) as record: sdml = SDML() sdml.fit(pairs, y_pairs) assert len(record) == 0 with pytest.warns(None) as record: sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) sdml.fit(X, y) assert len(record) == 0
def test_raises_no_warning_installed_skggm(self): # otherwise we should be able to instantiate and fit SDML and it # should raise no error and no ConvergenceWarning pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] X, y = make_classification(random_state=42) with pytest.warns(None) as records: sdml = SDML(prior='covariance') sdml.fit(pairs, y_pairs) for record in records: assert record.category is not ConvergenceWarning with pytest.warns(None) as records: sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5) sdml_supervised.fit(X, y) for record in records: assert record.category is not ConvergenceWarning
def test_sdml_supervised_raises_warning_msg_not_installed_skggm(self): """Tests that the right warning message is raised if someone tries to use SDML_Supervised but has not installed skggm, and that the algorithm fails to converge""" # TODO: remove if we don't need skggm anymore # load_iris: dataset where we know scikit-learn's graphical lasso fails # with a Floating Point error X, y = load_iris(return_X_y=True) sdml_supervised = SDML_Supervised(balance_param=0.5, sparsity_param=0.01) msg = ("There was a problem in SDML when using scikit-learn's graphical " "lasso solver. skggm's graphical lasso can sometimes converge on " "non SPD cases where scikit-learn's graphical lasso fails to " "converge. Try to install skggm and rerun the algorithm (see " "the README.md for the right version of skggm). The following " "error message was thrown:") with pytest.raises(RuntimeError) as raised_error: sdml_supervised.fit(X, y) assert str(raised_error.value).startswith(msg)
def test_sdml_supervised_raises_warning_msg_installed_skggm(self): """Tests that the right warning message is raised if someone tries to use SDML_Supervised but has not installed skggm, and that the algorithm fails to converge""" # TODO: remove if we don't need skggm anymore # case on which we know that skggm's graphical lasso fails # because it will return non finite values rng = np.random.RandomState(42) # This example will create a diagonal em_cov with a negative coeff ( # pathological case) X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]]) y = [0, 0, 1, 1] sdml_supervised = SDML_Supervised(balance_param=0.5, prior='identity', sparsity_param=0.01, random_state=rng) msg = ("There was a problem in SDML when using skggm's graphical " "lasso solver.") with pytest.raises(RuntimeError) as raised_error: sdml_supervised.fit(X, y) assert msg == str(raised_error.value)
(MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(prior='identity', balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(init='pca'), build_regression)] ids_regressors = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin) tuples_learners = pairs_learners + quadruplets_learners ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners
(MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(use_cov=False, balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(use_cov=False, balance_param=1e-5), build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin) tuples_learners = pairs_learners + quadruplets_learners ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners
(ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(use_cov=False, balance_param=1e-5), build_pairs) ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(use_cov=False, balance_param=1e-5), build_classification)] ids_classifiers = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin) tuples_learners = pairs_learners + quadruplets_learners ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners supervised_learners = classifiers + regressors
def __init__(self): self.metric_model = SDML_Supervised(num_constraints=200) self.X_tr = None self.y_train = None self.X_te = None
(ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(prior='identity', balance_param=1e-5), build_pairs) ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification)] ids_classifiers = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(init='pca'), build_regression)] ids_regressors = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin) tuples_learners = pairs_learners + quadruplets_learners ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners supervised_learners = classifiers + regressors
def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) sdml.fit(self.X, self.y, random_state=seed) L = sdml.transformer_ assert_array_almost_equal(L.T.dot(L), sdml.metric())
(ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(), build_pairs), ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(), build_classification)] ids_classifiers = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin) tuples_learners = pairs_learners + quadruplets_learners ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners supervised_learners = classifiers + regressors
print("Method: NCA", '\n') #print('Max', TrainData.max(axis=0)) #print('sssssssss', len(TrainData[0])) #print('sssssssss', len(TrainData.max(axis=0))) #print('Min', TrainData.min(axis=0)) nca = NCA(max_iter=500, learning_rate=0.01) # print('ssssssss', TrainData) x = nca.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'SDML': print("Method: SDML", '\n') sdml = SDML_Supervised(num_constraints=200) x = sdml.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'RCA': print("Method: RCA", '\n') rca = RCA_Supervised(num_chunks=2, chunk_size=1) x = rca.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') #print(len(TTestData)) #print(TTestData[0]) #rca = RCA_Supervised(num_chunks=2, chunk_size=1)