def sdml_fit(samples, similarity_set, prior='covariance', balance_param=0.15): """Prior can be 'covariance', 'identity' or 'random'. balance_param was used 0.5 in the first version of the paper, but it does not work here with such a large value. """ n_samples = len(similarity_set) sdml = SDML(prior=prior, preprocessor=samples, verbose=True, balance_param=balance_param) pairs, Y = [], [] for ind1 in range(n_samples): for ind2 in range(n_samples): pairs.append([ind1, ind2]) if similarity_set[ind1, ind2]: Y.append(1) else: Y.append(-1) start = time() sdml.fit(pairs, Y) print("Fitting took {:.2f} seconds.".format(time() - start)) return sdml
def test_sdml_converges_if_psd(self): """Tests that sdml converges on a simple problem where we know the pseudo-covariance matrix is PSD""" pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y = [1, -1] sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) sdml.fit(pairs, y) assert np.isfinite(sdml.get_mahalanobis_matrix()).all()
def test_verbose_has_not_installed_skggm_sdml(capsys): # Test that if users have installed skggm, a message is printed telling them # skggm's solver is used (when they use SDML) # TODO: remove if we don't need skggm anymore pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] sdml = SDML(verbose=True) sdml.fit(pairs, y_pairs) out, _ = capsys.readouterr() assert "SDML will use scikit-learn's graphical lasso solver." in out
def test_raises_no_warning_installed_skggm(self): # otherwise we should be able to instantiate and fit SDML and it # should raise no warning pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] X, y = make_classification(random_state=42) with pytest.warns(None) as record: sdml = SDML() sdml.fit(pairs, y_pairs) assert len(record) == 0 with pytest.warns(None) as record: sdml = SDML_Supervised(use_cov=False, balance_param=1e-5) sdml.fit(X, y) assert len(record) == 0
def test_sdml_raises_warning_msg_installed_skggm(self): """Tests that the right warning message is raised if someone tries to use SDML but has not installed skggm, and that the algorithm fails to converge""" # TODO: remove if we don't need skggm anymore # case on which we know that skggm's graphical lasso fails # because it will return non finite values pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y_pairs = [1, -1] sdml = SDML(use_cov=False, balance_param=100, verbose=True) msg = ("There was a problem in SDML when using skggm's graphical " "lasso solver.") with pytest.raises(RuntimeError) as raised_error: sdml.fit(pairs, y_pairs) assert msg == str(raised_error.value)
def test_raises_no_warning_installed_skggm(self): # otherwise we should be able to instantiate and fit SDML and it # should raise no error and no ConvergenceWarning pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] X, y = make_classification(random_state=42) with pytest.warns(None) as records: sdml = SDML(prior='covariance') sdml.fit(pairs, y_pairs) for record in records: assert record.category is not ConvergenceWarning with pytest.warns(None) as records: sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5) sdml_supervised.fit(X, y) for record in records: assert record.category is not ConvergenceWarning
def test_sdml_raises_warning_non_psd(self): """Tests that SDML raises a warning on a toy example where we know the pseudo-covariance matrix is not PSD""" pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y = [1, -1] sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5) msg = ("Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set use_cov=False.") with pytest.warns(ConvergenceWarning) as raised_warning: try: sdml.fit(pairs, y) except Exception: pass # we assert that this warning is in one of the warning raised by the # estimator assert msg in list(map(lambda w: str(w.message), raised_warning))
def test_sdml_raises_warning_msg_not_installed_skggm(self): """Tests that the right warning message is raised if someone tries to use SDML but has not installed skggm, and that the algorithm fails to converge""" # TODO: remove if we don't need skggm anymore # case on which we know that scikit-learn's graphical lasso fails # because it will return a non SPD matrix pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y_pairs = [1, -1] sdml = SDML(prior='identity', balance_param=100, verbose=True) msg = ("There was a problem in SDML when using scikit-learn's graphical " "lasso solver. skggm's graphical lasso can sometimes converge on " "non SPD cases where scikit-learn's graphical lasso fails to " "converge. Try to install skggm and rerun the algorithm (see " "the README.md for the right version of skggm).") with pytest.raises(RuntimeError) as raised_error: sdml.fit(pairs, y_pairs) assert msg == str(raised_error.value)
def test_sdml_raises_warning_msg_not_installed_skggm(self): """Tests that the right warning message is raised if someone tries to use SDML but has not installed skggm, and that the algorithm fails to converge""" # TODO: remove if we don't need skggm anymore # case on which we know that scikit-learn's graphical lasso fails # because it will return a non SPD matrix pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) y_pairs = [1, -1] sdml = SDML(use_cov=False, balance_param=100, verbose=True) msg = ("There was a problem in SDML when using scikit-learn's graphical " "lasso solver. skggm's graphical lasso can sometimes converge on " "non SPD cases where scikit-learn's graphical lasso fails to " "converge. Try to install skggm and rerun the algorithm (see " "the README.md for the right version of skggm).") with pytest.raises(RuntimeError) as raised_error: sdml.fit(pairs, y_pairs) assert msg == str(raised_error.value)
def test_tiwafer(): num_constraints = 1500 print "Loading Data...." tiwafer_data = load_data_sdml() sim_pairs = tiwafer_data.sim_pairs diff_pairs = tiwafer_data.diff_pairs sorted_ids = tiwafer_data.sortedIds ti_data = np.array(tiwafer_data.data) labels = np.array(tiwafer_data.target) print "Done Loading Data.\nLearning Distance Metric...." num_points = len(sorted_ids) W = prepare_constraints_old(labels, num_points, num_constraints) sdml = SDML() # W = prepare_constraints(sorted_ids, sim_pairs, diff_pairs) sdml.fit(ti_data, W) W_metric = sdml.metric() cPickle.dump(W_metric, open('W_metric_sdml.p', 'wb')) W_trans = sdml.transformer() with open('W_trans_sdml.p', 'wb') as handle: cPickle.dump(W_trans, handle)