Esempio n. 1
0
def sdml_fit(samples, similarity_set, prior='covariance', balance_param=0.15):
    """Prior can be 'covariance', 'identity' or 'random'. 
    balance_param was used 0.5 in the first version of the paper, but it does not work here with such a large value. """

    n_samples = len(similarity_set)

    sdml = SDML(prior=prior,
                preprocessor=samples,
                verbose=True,
                balance_param=balance_param)

    pairs, Y = [], []
    for ind1 in range(n_samples):
        for ind2 in range(n_samples):
            pairs.append([ind1, ind2])
            if similarity_set[ind1, ind2]:
                Y.append(1)
            else:
                Y.append(-1)

    start = time()
    sdml.fit(pairs, Y)
    print("Fitting took {:.2f} seconds.".format(time() - start))

    return sdml
Esempio n. 2
0
 def test_sdml_converges_if_psd(self):
     """Tests that sdml converges on a simple problem where we know the
 pseudo-covariance matrix is PSD"""
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y = [1, -1]
     sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
     sdml.fit(pairs, y)
     assert np.isfinite(sdml.get_mahalanobis_matrix()).all()
 def test_sdml_converges_if_psd(self):
   """Tests that sdml converges on a simple problem where we know the
   pseudo-covariance matrix is PSD"""
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y = [1, -1]
   sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
   sdml.fit(pairs, y)
   assert np.isfinite(sdml.get_mahalanobis_matrix()).all()
Esempio n. 4
0
def test_verbose_has_not_installed_skggm_sdml(capsys):
    # Test that if users have installed skggm, a message is printed telling them
    # skggm's solver is used (when they use SDML)
    # TODO: remove if we don't need skggm anymore
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
    y_pairs = [1, -1]
    sdml = SDML(verbose=True)
    sdml.fit(pairs, y_pairs)
    out, _ = capsys.readouterr()
    assert "SDML will use scikit-learn's graphical lasso solver." in out
def test_verbose_has_not_installed_skggm_sdml(capsys):
  # Test that if users have installed skggm, a message is printed telling them
  # skggm's solver is used (when they use SDML)
  # TODO: remove if we don't need skggm anymore
  pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
  y_pairs = [1, -1]
  sdml = SDML(verbose=True)
  sdml.fit(pairs, y_pairs)
  out, _ = capsys.readouterr()
  assert "SDML will use scikit-learn's graphical lasso solver." in out
Esempio n. 6
0
 def test_raises_no_warning_installed_skggm(self):
     # otherwise we should be able to instantiate and fit SDML and it
     # should raise no warning
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y_pairs = [1, -1]
     X, y = make_classification(random_state=42)
     with pytest.warns(None) as record:
         sdml = SDML()
         sdml.fit(pairs, y_pairs)
     assert len(record) == 0
     with pytest.warns(None) as record:
         sdml = SDML_Supervised(use_cov=False, balance_param=1e-5)
         sdml.fit(X, y)
     assert len(record) == 0
 def test_raises_no_warning_installed_skggm(self):
   # otherwise we should be able to instantiate and fit SDML and it
   # should raise no warning
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y_pairs = [1, -1]
   X, y = make_classification(random_state=42)
   with pytest.warns(None) as record:
     sdml = SDML()
     sdml.fit(pairs, y_pairs)
   assert len(record) == 0
   with pytest.warns(None) as record:
     sdml = SDML_Supervised(use_cov=False, balance_param=1e-5)
     sdml.fit(X, y)
   assert len(record) == 0
Esempio n. 8
0
    def test_sdml_raises_warning_msg_installed_skggm(self):
        """Tests that the right warning message is raised if someone tries to
    use SDML but has not installed skggm, and that the algorithm fails to
    converge"""
        # TODO: remove if we don't need skggm anymore
        # case on which we know that skggm's graphical lasso fails
        # because it will return non finite values
        pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
        y_pairs = [1, -1]
        sdml = SDML(use_cov=False, balance_param=100, verbose=True)

        msg = ("There was a problem in SDML when using skggm's graphical "
               "lasso solver.")
        with pytest.raises(RuntimeError) as raised_error:
            sdml.fit(pairs, y_pairs)
        assert msg == str(raised_error.value)
Esempio n. 9
0
 def test_raises_no_warning_installed_skggm(self):
   # otherwise we should be able to instantiate and fit SDML and it
   # should raise no error and no ConvergenceWarning
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y_pairs = [1, -1]
   X, y = make_classification(random_state=42)
   with pytest.warns(None) as records:
     sdml = SDML(prior='covariance')
     sdml.fit(pairs, y_pairs)
   for record in records:
     assert record.category is not ConvergenceWarning
   with pytest.warns(None) as records:
     sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5)
     sdml_supervised.fit(X, y)
   for record in records:
     assert record.category is not ConvergenceWarning
Esempio n. 10
0
  def test_sdml_raises_warning_msg_installed_skggm(self):
    """Tests that the right warning message is raised if someone tries to
    use SDML but has not installed skggm, and that the algorithm fails to
    converge"""
    # TODO: remove if we don't need skggm anymore
    # case on which we know that skggm's graphical lasso fails
    # because it will return non finite values
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    sdml = SDML(use_cov=False, balance_param=100, verbose=True)

    msg = ("There was a problem in SDML when using skggm's graphical "
           "lasso solver.")
    with pytest.raises(RuntimeError) as raised_error:
      sdml.fit(pairs, y_pairs)
    assert msg == str(raised_error.value)
Esempio n. 11
0
 def test_sdml_raises_warning_non_psd(self):
     """Tests that SDML raises a warning on a toy example where we know the
 pseudo-covariance matrix is not PSD"""
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
     y = [1, -1]
     sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
     msg = ("Warning, the input matrix of graphical lasso is not "
            "positive semi-definite (PSD). The algorithm may diverge, "
            "and lead to degenerate solutions. "
            "To prevent that, try to decrease the balance parameter "
            "`balance_param` and/or to set use_cov=False.")
     with pytest.warns(ConvergenceWarning) as raised_warning:
         try:
             sdml.fit(pairs, y)
         except Exception:
             pass
     # we assert that this warning is in one of the warning raised by the
     # estimator
     assert msg in list(map(lambda w: str(w.message), raised_warning))
Esempio n. 12
0
  def test_sdml_raises_warning_msg_not_installed_skggm(self):
    """Tests that the right warning message is raised if someone tries to
    use SDML but has not installed skggm, and that the algorithm fails to
    converge"""
    # TODO: remove if we don't need skggm anymore
    # case on which we know that scikit-learn's graphical lasso fails
    # because it will return a non SPD matrix
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    sdml = SDML(prior='identity', balance_param=100, verbose=True)

    msg = ("There was a problem in SDML when using scikit-learn's graphical "
           "lasso solver. skggm's graphical lasso can sometimes converge on "
           "non SPD cases where scikit-learn's graphical lasso fails to "
           "converge. Try to install skggm and rerun the algorithm (see "
           "the README.md for the right version of skggm).")
    with pytest.raises(RuntimeError) as raised_error:
      sdml.fit(pairs, y_pairs)
    assert msg == str(raised_error.value)
Esempio n. 13
0
 def test_sdml_raises_warning_non_psd(self):
   """Tests that SDML raises a warning on a toy example where we know the
   pseudo-covariance matrix is not PSD"""
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
   y = [1, -1]
   sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
   msg = ("Warning, the input matrix of graphical lasso is not "
          "positive semi-definite (PSD). The algorithm may diverge, "
          "and lead to degenerate solutions. "
          "To prevent that, try to decrease the balance parameter "
          "`balance_param` and/or to set use_cov=False.")
   with pytest.warns(ConvergenceWarning) as raised_warning:
     try:
       sdml.fit(pairs, y)
     except Exception:
       pass
   # we assert that this warning is in one of the warning raised by the
   # estimator
   assert msg in list(map(lambda w: str(w.message), raised_warning))
Esempio n. 14
0
  def test_sdml_raises_warning_msg_not_installed_skggm(self):
    """Tests that the right warning message is raised if someone tries to
    use SDML but has not installed skggm, and that the algorithm fails to
    converge"""
    # TODO: remove if we don't need skggm anymore
    # case on which we know that scikit-learn's graphical lasso fails
    # because it will return a non SPD matrix
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    sdml = SDML(use_cov=False, balance_param=100, verbose=True)

    msg = ("There was a problem in SDML when using scikit-learn's graphical "
           "lasso solver. skggm's graphical lasso can sometimes converge on "
           "non SPD cases where scikit-learn's graphical lasso fails to "
           "converge. Try to install skggm and rerun the algorithm (see "
           "the README.md for the right version of skggm).")
    with pytest.raises(RuntimeError) as raised_error:
      sdml.fit(pairs, y_pairs)
    assert msg == str(raised_error.value)
def test_tiwafer():
    num_constraints = 1500
    print "Loading Data...."
    tiwafer_data = load_data_sdml()
    sim_pairs = tiwafer_data.sim_pairs
    diff_pairs = tiwafer_data.diff_pairs
    sorted_ids = tiwafer_data.sortedIds
    ti_data = np.array(tiwafer_data.data)
    labels = np.array(tiwafer_data.target)

    print "Done Loading Data.\nLearning Distance Metric...."

    num_points = len(sorted_ids)
    W = prepare_constraints_old(labels, num_points, num_constraints)

    sdml = SDML()
    # W = prepare_constraints(sorted_ids, sim_pairs, diff_pairs)

    sdml.fit(ti_data, W)
    W_metric = sdml.metric()
    cPickle.dump(W_metric, open('W_metric_sdml.p', 'wb'))
    W_trans = sdml.transformer()
    with open('W_trans_sdml.p', 'wb') as handle:
        cPickle.dump(W_trans, handle)