def test_iris(self):

    # Generate full set of constraints for comparison with reference implementation
    mask = (self.iris_labels[None] == self.iris_labels[:, None])
    a, b = np.nonzero(np.triu(mask, k=1))
    c, d = np.nonzero(np.triu(~mask, k=1))

    # Full metric
    mmc = MMC(convergence_threshold=0.01)
    mmc.fit(self.iris_points, [a, b, c, d])
    expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265],
                [+0.00083371, +0.00149466, -0.00200719, -0.00296284],
                [-0.00111959, -0.00200719, +0.00269546, +0.00397881],
                [-0.00165265, -0.00296284, +0.00397881, +0.00587320]]
    assert_array_almost_equal(expected, mmc.metric(), decimal=6)

    # Diagonal metric
    mmc = MMC(diagonal=True)
    mmc.fit(self.iris_points, [a, b, c, d])
    expected = [0, 0, 1.21045968, 1.22552608]
    assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)

    # Supervised Full
    mmc = MMC_Supervised()
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(), self.iris_labels)
    self.assertLess(csep, 0.15)

    # Supervised Diagonal
    mmc = MMC_Supervised(diagonal=True)
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(), self.iris_labels)
    self.assertLess(csep, 0.2)
Exemple #2
0
    def test_iris(self):

        # Generate full set of constraints for comparison with reference implementation
        n = self.iris_points.shape[0]
        mask = (self.iris_labels[None] == self.iris_labels[:, None])
        a, b = np.nonzero(np.triu(mask, k=1))
        c, d = np.nonzero(np.triu(~mask, k=1))

        # Full metric
        mmc = MMC(convergence_threshold=0.01)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [[0.000514, 0.000868, -0.001195, -0.001703],
                    [0.000868, 0.001468, -0.002021, -0.002879],
                    [-0.001195, -0.002021, 0.002782, 0.003964],
                    [-0.001703, -0.002879, 0.003964, 0.005648]]
        assert_array_almost_equal(expected, mmc.metric(), decimal=6)

        # Diagonal metric
        mmc = MMC(diagonal=True)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [0, 0, 1.210220, 1.228596]

        assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)

        # Supervised Full
        mmc = MMC_Supervised()
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.15)

        # Supervised Diagonal
        mmc = MMC_Supervised(diagonal=True)
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.2)
Exemple #3
0
    def test_iris(self):

        # Generate full set of constraints for comparison with reference implementation
        mask = (self.iris_labels[None] == self.iris_labels[:, None])
        a, b = np.nonzero(np.triu(mask, k=1))
        c, d = np.nonzero(np.triu(~mask, k=1))

        # Full metric
        mmc = MMC(convergence_threshold=0.01)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265],
                    [+0.00083371, +0.00149466, -0.00200719, -0.00296284],
                    [-0.00111959, -0.00200719, +0.00269546, +0.00397881],
                    [-0.00165265, -0.00296284, +0.00397881, +0.00587320]]
        assert_array_almost_equal(expected, mmc.metric(), decimal=6)

        # Diagonal metric
        mmc = MMC(diagonal=True)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [0, 0, 1.21045968, 1.22552608]
        assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)

        # Supervised Full
        mmc = MMC_Supervised()
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.15)

        # Supervised Diagonal
        mmc = MMC_Supervised(diagonal=True)
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.2)
def learn_mmc_metric(X_test):
    mmc_dict = dict()

    for respondent_id in range(1, 21):

        y_test = pd.DataFrame(
            pd.read_pickle(r'../data/HCON/HCON_long_lik.pkl')
            [respondent_id]).values.reshape(-1, 1)

        mask = (y_test[None] == y_test[:, None])[:, :, 0]
        a, b = np.nonzero(np.triu(mask, k=1))  # similarity pairs
        c, d = np.nonzero(np.triu(~mask, k=1))  # dissimilarity pairs
        mmc = MMC(convergence_threshold=0.001)

        try:
            mmc.fit(X_test.values, (a, b, c, d))
            L = mmc.transform(np.diag(np.ones(9)))
            M = np.dot(L, L.T)
        except ValueError:
            # it should be converged anyway,
            # if the ValueError happens, there is some bad patterns of the input
            print(
                'R%d has no non-trivial dissimilarity constraints given for MMC.'
                % respondent_id)
            M = 0.01 * np.diag(np.ones(9))

        mmc_dict['R%d' % respondent_id] = M * 100
        print('R:%2d' % respondent_id, ' First Row of MMC Mahalanobis Matrix:',
              (M[0] * 100).round(3))

    return mmc_dict
Exemple #5
0
    def fit(self, X, y=None, ml=[], cl=[]):
        X_transformed = X

        if ml and cl:
            # ml_graph, cl_graph, _ = preprocess_constraints(ml, cl, X.shape[0])
            #
            # ml, cl = [], []
            # for i, constraints in ml_graph.items():
            #     for j in constraints:
            #         ml.append((i, j))
            #
            # for i, constraints in cl_graph.items():
            #     for j in constraints:
            #         cl.append((i, j))

            constraints = [np.array(lst) for lst in [*zip(*ml), *zip(*cl)]]
            mmc = MMC(diagonal=self.diagonal)
            mmc.fit(X, constraints=constraints)
            X_transformed = mmc.transform(X)

        kmeans = KMeans(n_clusters=self.n_clusters, init='random', max_iter=self.max_iter)
        kmeans.fit(X_transformed)

        self.labels_ = kmeans.labels_

        return self
 def fit(self, X, y=None, constraints=None):
     mmc = MMC(diagonal=self.diagonal)
     mmc.fit(X, constraints=constraints)
     X_transformed = mmc.transform(X)
     kmeans = KMeans(n_clusters=self.n_clusters,
                     init='random',
                     max_iter=self.max_iter)
     kmeans.fit(X_transformed)
     self.labels_ = kmeans.labels_
     return self
Exemple #7
0
def main(args):
    print("Deriving similar/dissimilar constraints for metric learning.")
    with gzip.open(args.transfer_acc, "rb") as fr:
        # transer_acc[tgt][src]: accuracy of src->tgt
        transfer_acc = pickle.load(fr)
    _mean = {
        l: mean(list(transfer_acc[l].values()))
        for l in transfer_acc.keys()
    }
    _std = {
        l: stdev(list(transfer_acc[l].values()))
        for l in transfer_acc.keys()
    }

    alpha = 0.5
    sim_pairs = []
    dissim_pairs = []

    meta_langs = list(transfer_acc.keys())
    for i in range(len(meta_langs)):
        for j in range(i + 1, len(meta_langs)):
            l1 = meta_langs[i]
            l2 = meta_langs[j]
            if transfer_acc[l1][l2] > _mean[l1] + alpha * _std[l1] and \
               transfer_acc[l2][l1] > _mean[l2] + alpha * _std[l2]:
                sim_pairs.append([l1, l2])
            elif transfer_acc[l1][l2] < _mean[l1] - alpha * _std[l1] and \
                 transfer_acc[l2][l1] < _mean[l2] - alpha * _std[l2]:
                dissim_pairs.append([l1, l2])

    # constraints: [simA, simB, dissimA, dissimB]
    constraints = list(zip(*sim_pairs)) + list(zip(*dissim_pairs))
    constraints = [
        list(map(lambda l: meta_langs.index(l), lst)) for lst in constraints
    ]
    constraints = [np.array(x) for x in constraints]

    print("Mahalanobis metric learning.")
    with gzip.open(args.feature_path, "rb") as fr:
        typology_vec = pickle.load(fr)
    meta_X = np.array([typology_vec[l] for l in meta_langs])
    mmc = MMC()
    mmc.fit(meta_X, constraints)

    print("Apply the learned metric to the full typology vector space.")
    all_langs = list(typology_vec.keys())
    X = np.array([typology_vec[l] for l in all_langs])
    X = mmc.transform(X).tolist()
    typology_vec_transformed = {
        all_langs[i]: X[i]
        for i in range(len(all_langs))
    }

    with gzip.open(args.output_file, "wb") as fw:
        pickle.dump(typology_vec_transformed, fw)
Exemple #8
0
    def test_iris(self):

        # Generate full set of constraints for comparison with reference
        # implementation
        mask = self.iris_labels[None] == self.iris_labels[:, None]
        a, b = np.nonzero(np.triu(mask, k=1))
        c, d = np.nonzero(np.triu(~mask, k=1))

        # Full metric
        n_features = self.iris_points.shape[1]
        mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10)
        mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
        expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                    [+0.000868, +0.001468, -0.002021, -0.002879],
                    [-0.001195, -0.002021, +0.002782, +0.003964],
                    [-0.001703, -0.002879, +0.003964, +0.005648]]
        assert_array_almost_equal(expected,
                                  mmc.get_mahalanobis_matrix(),
                                  decimal=6)

        # Diagonal metric
        mmc = MMC(diagonal=True)
        mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
        expected = [0, 0, 1.210220, 1.228596]
        assert_array_almost_equal(np.diag(expected),
                                  mmc.get_mahalanobis_matrix(),
                                  decimal=6)

        # Supervised Full
        mmc = MMC_Supervised()
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.15)

        # Supervised Diagonal
        mmc = MMC_Supervised(diagonal=True)
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.2)
  def test_iris(self):

    # Generate full set of constraints for comparison with reference implementation
    n = self.iris_points.shape[0]
    mask = (self.iris_labels[None] == self.iris_labels[:,None])
    a, b = np.nonzero(np.triu(mask, k=1))
    c, d = np.nonzero(np.triu(~mask, k=1))

    # Full metric
    mmc = MMC(convergence_threshold=0.01)
    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
    expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                [+0.000868, +0.001468, -0.002021, -0.002879],
                [-0.001195, -0.002021, +0.002782, +0.003964],
                [-0.001703, -0.002879, +0.003964, +0.005648]]
    assert_array_almost_equal(expected, mmc.get_mahalanobis_matrix(),
                              decimal=6)

    # Diagonal metric
    mmc = MMC(diagonal=True)
    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
    expected = [0, 0, 1.210220, 1.228596]
    assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(),
                              decimal=6)

    # Supervised Full
    mmc = MMC_Supervised()
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.15)
    
    # Supervised Diagonal
    mmc = MMC_Supervised(diagonal=True)
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.2)