def learn_mmc_metric(X_test):
    mmc_dict = dict()

    for respondent_id in range(1, 21):

        y_test = pd.DataFrame(
            pd.read_pickle(r'../data/HCON/HCON_long_lik.pkl')
            [respondent_id]).values.reshape(-1, 1)

        mask = (y_test[None] == y_test[:, None])[:, :, 0]
        a, b = np.nonzero(np.triu(mask, k=1))  # similarity pairs
        c, d = np.nonzero(np.triu(~mask, k=1))  # dissimilarity pairs
        mmc = MMC(convergence_threshold=0.001)

        try:
            mmc.fit(X_test.values, (a, b, c, d))
            L = mmc.transform(np.diag(np.ones(9)))
            M = np.dot(L, L.T)
        except ValueError:
            # it should be converged anyway,
            # if the ValueError happens, there is some bad patterns of the input
            print(
                'R%d has no non-trivial dissimilarity constraints given for MMC.'
                % respondent_id)
            M = 0.01 * np.diag(np.ones(9))

        mmc_dict['R%d' % respondent_id] = M * 100
        print('R:%2d' % respondent_id, ' First Row of MMC Mahalanobis Matrix:',
              (M[0] * 100).round(3))

    return mmc_dict
예제 #2
0
    def fit(self, X, y=None, ml=[], cl=[]):
        X_transformed = X

        if ml and cl:
            # ml_graph, cl_graph, _ = preprocess_constraints(ml, cl, X.shape[0])
            #
            # ml, cl = [], []
            # for i, constraints in ml_graph.items():
            #     for j in constraints:
            #         ml.append((i, j))
            #
            # for i, constraints in cl_graph.items():
            #     for j in constraints:
            #         cl.append((i, j))

            constraints = [np.array(lst) for lst in [*zip(*ml), *zip(*cl)]]
            mmc = MMC(diagonal=self.diagonal)
            mmc.fit(X, constraints=constraints)
            X_transformed = mmc.transform(X)

        kmeans = KMeans(n_clusters=self.n_clusters, init='random', max_iter=self.max_iter)
        kmeans.fit(X_transformed)

        self.labels_ = kmeans.labels_

        return self
예제 #3
0
def runMMC():

    # Run MMC
    from metric_learn import MMC
    """
    Learn MMC (Mahalanobis Metrics for Clustering) Model 
    """
    mmc = MMC()
    mmc.fit(pairs, y)  # learn the MMC model
    print("Mahalanobis Matrix : ", mmc.get_mahalanobis_matrix())
 def fit(self, X, y=None, constraints=None):
     mmc = MMC(diagonal=self.diagonal)
     mmc.fit(X, constraints=constraints)
     X_transformed = mmc.transform(X)
     kmeans = KMeans(n_clusters=self.n_clusters,
                     init='random',
                     max_iter=self.max_iter)
     kmeans.fit(X_transformed)
     self.labels_ = kmeans.labels_
     return self
예제 #5
0
def main(args):
    print("Deriving similar/dissimilar constraints for metric learning.")
    with gzip.open(args.transfer_acc, "rb") as fr:
        # transer_acc[tgt][src]: accuracy of src->tgt
        transfer_acc = pickle.load(fr)
    _mean = {
        l: mean(list(transfer_acc[l].values()))
        for l in transfer_acc.keys()
    }
    _std = {
        l: stdev(list(transfer_acc[l].values()))
        for l in transfer_acc.keys()
    }

    alpha = 0.5
    sim_pairs = []
    dissim_pairs = []

    meta_langs = list(transfer_acc.keys())
    for i in range(len(meta_langs)):
        for j in range(i + 1, len(meta_langs)):
            l1 = meta_langs[i]
            l2 = meta_langs[j]
            if transfer_acc[l1][l2] > _mean[l1] + alpha * _std[l1] and \
               transfer_acc[l2][l1] > _mean[l2] + alpha * _std[l2]:
                sim_pairs.append([l1, l2])
            elif transfer_acc[l1][l2] < _mean[l1] - alpha * _std[l1] and \
                 transfer_acc[l2][l1] < _mean[l2] - alpha * _std[l2]:
                dissim_pairs.append([l1, l2])

    # constraints: [simA, simB, dissimA, dissimB]
    constraints = list(zip(*sim_pairs)) + list(zip(*dissim_pairs))
    constraints = [
        list(map(lambda l: meta_langs.index(l), lst)) for lst in constraints
    ]
    constraints = [np.array(x) for x in constraints]

    print("Mahalanobis metric learning.")
    with gzip.open(args.feature_path, "rb") as fr:
        typology_vec = pickle.load(fr)
    meta_X = np.array([typology_vec[l] for l in meta_langs])
    mmc = MMC()
    mmc.fit(meta_X, constraints)

    print("Apply the learned metric to the full typology vector space.")
    all_langs = list(typology_vec.keys())
    X = np.array([typology_vec[l] for l in all_langs])
    X = mmc.transform(X).tolist()
    typology_vec_transformed = {
        all_langs[i]: X[i]
        for i in range(len(all_langs))
    }

    with gzip.open(args.output_file, "wb") as fw:
        pickle.dump(typology_vec_transformed, fw)
예제 #6
0
    def test_iris(self):

        # Generate full set of constraints for comparison with reference implementation
        n = self.iris_points.shape[0]
        mask = (self.iris_labels[None] == self.iris_labels[:, None])
        a, b = np.nonzero(np.triu(mask, k=1))
        c, d = np.nonzero(np.triu(~mask, k=1))

        # Full metric
        mmc = MMC(convergence_threshold=0.01)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [[0.000514, 0.000868, -0.001195, -0.001703],
                    [0.000868, 0.001468, -0.002021, -0.002879],
                    [-0.001195, -0.002021, 0.002782, 0.003964],
                    [-0.001703, -0.002879, 0.003964, 0.005648]]
        assert_array_almost_equal(expected, mmc.metric(), decimal=6)

        # Diagonal metric
        mmc = MMC(diagonal=True)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [0, 0, 1.210220, 1.228596]

        assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)

        # Supervised Full
        mmc = MMC_Supervised()
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.15)

        # Supervised Diagonal
        mmc = MMC_Supervised(diagonal=True)
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.2)
예제 #7
0
    def test_iris(self):

        # Generate full set of constraints for comparison with reference implementation
        mask = (self.iris_labels[None] == self.iris_labels[:, None])
        a, b = np.nonzero(np.triu(mask, k=1))
        c, d = np.nonzero(np.triu(~mask, k=1))

        # Full metric
        mmc = MMC(convergence_threshold=0.01)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265],
                    [+0.00083371, +0.00149466, -0.00200719, -0.00296284],
                    [-0.00111959, -0.00200719, +0.00269546, +0.00397881],
                    [-0.00165265, -0.00296284, +0.00397881, +0.00587320]]
        assert_array_almost_equal(expected, mmc.metric(), decimal=6)

        # Diagonal metric
        mmc = MMC(diagonal=True)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [0, 0, 1.21045968, 1.22552608]
        assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)

        # Supervised Full
        mmc = MMC_Supervised()
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.15)

        # Supervised Diagonal
        mmc = MMC_Supervised(diagonal=True)
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.2)
예제 #8
0
  def test_iris(self):

    # Generate full set of constraints for comparison with reference implementation
    mask = (self.iris_labels[None] == self.iris_labels[:, None])
    a, b = np.nonzero(np.triu(mask, k=1))
    c, d = np.nonzero(np.triu(~mask, k=1))

    # Full metric
    mmc = MMC(convergence_threshold=0.01)
    mmc.fit(self.iris_points, [a, b, c, d])
    expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265],
                [+0.00083371, +0.00149466, -0.00200719, -0.00296284],
                [-0.00111959, -0.00200719, +0.00269546, +0.00397881],
                [-0.00165265, -0.00296284, +0.00397881, +0.00587320]]
    assert_array_almost_equal(expected, mmc.metric(), decimal=6)

    # Diagonal metric
    mmc = MMC(diagonal=True)
    mmc.fit(self.iris_points, [a, b, c, d])
    expected = [0, 0, 1.21045968, 1.22552608]
    assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)

    # Supervised Full
    mmc = MMC_Supervised()
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(), self.iris_labels)
    self.assertLess(csep, 0.15)

    # Supervised Diagonal
    mmc = MMC_Supervised(diagonal=True)
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(), self.iris_labels)
    self.assertLess(csep, 0.2)
예제 #9
0
def learn_distance_metric(distances, pairs_per_prototype=100, 
                                     test_size=0.5, 
                                     return_features=False,
                                     return_pairs=False):
    feature_pipeline = Pipeline([
        ('dates', DateFeatureTransformer()),
        ('features', MMCFeatureTransformer()),
    ])
    
    features = feature_pipeline.fit_transform(distances)
    pairs = create_mmc_pairs(distances, pairs_per_prototype=pairs_per_prototype)
    
    X_train, X_test, y_train, y_test = train_test_split(pairs[:, :2], pairs[:, -1], 
                shuffle=True, stratify=pairs[:, -1], test_size=test_size
    )

    mmc = MMC(preprocessor=np.array(features, dtype=np.float))
    mmc = mmc.fit(X_train, y_train)
    score = f1_score(y_test, mmc.predict(X_test), average='weighted')
    return SimpleNamespace(
        score=score,
        metric_components=mmc.components_.transpose(),
        features=None if not return_features else features,
        pairs=None if not return_pairs else pairs 
    )
예제 #10
0
    def test_iris(self):

        # Generate full set of constraints for comparison with reference
        # implementation
        mask = self.iris_labels[None] == self.iris_labels[:, None]
        a, b = np.nonzero(np.triu(mask, k=1))
        c, d = np.nonzero(np.triu(~mask, k=1))

        # Full metric
        n_features = self.iris_points.shape[1]
        mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10)
        mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
        expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                    [+0.000868, +0.001468, -0.002021, -0.002879],
                    [-0.001195, -0.002021, +0.002782, +0.003964],
                    [-0.001703, -0.002879, +0.003964, +0.005648]]
        assert_array_almost_equal(expected,
                                  mmc.get_mahalanobis_matrix(),
                                  decimal=6)

        # Diagonal metric
        mmc = MMC(diagonal=True)
        mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
        expected = [0, 0, 1.210220, 1.228596]
        assert_array_almost_equal(np.diag(expected),
                                  mmc.get_mahalanobis_matrix(),
                                  decimal=6)

        # Supervised Full
        mmc = MMC_Supervised()
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.15)

        # Supervised Diagonal
        mmc = MMC_Supervised(diagonal=True)
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.2)
예제 #11
0
  def test_iris(self):

    # Generate full set of constraints for comparison with reference implementation
    n = self.iris_points.shape[0]
    mask = (self.iris_labels[None] == self.iris_labels[:,None])
    a, b = np.nonzero(np.triu(mask, k=1))
    c, d = np.nonzero(np.triu(~mask, k=1))

    # Full metric
    mmc = MMC(convergence_threshold=0.01)
    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
    expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                [+0.000868, +0.001468, -0.002021, -0.002879],
                [-0.001195, -0.002021, +0.002782, +0.003964],
                [-0.001703, -0.002879, +0.003964, +0.005648]]
    assert_array_almost_equal(expected, mmc.get_mahalanobis_matrix(),
                              decimal=6)

    # Diagonal metric
    mmc = MMC(diagonal=True)
    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
    expected = [0, 0, 1.210220, 1.228596]
    assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(),
                              decimal=6)

    # Supervised Full
    mmc = MMC_Supervised()
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.15)
    
    # Supervised Diagonal
    mmc = MMC_Supervised(diagonal=True)
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.2)
"""

from metric_learn import MMC

pairs = [[[1.2, 7.5], [1.3, 1.5]], [[6.4, 2.6], [6.2, 9.7]],
         [[1.3, 4.5], [3.2, 4.6]], [[6.2, 5.5], [5.4, 5.4]]]

# in this task we want points where the first feature is close to be closer to each other,
# no matter how close the second feature is

y = [1, 1, -1, -1]
"""
Learn MMC (Mahalanobis Metrics for Clustering) Model 
"""
mmc = MMC()
mmc.fit(pairs, y)  # learn the MMC model
"""
Return the decision function used to classify the pairs
"""
print("debug 1: ", mmc.decision_function(pairs))
"""
Returns a copy of the Mahalanobis matrix learned by the metric learner
"""
print("debug 2: ", mmc.get_mahalanobis_matrix())
"""
Returns a function that takes as input two 1D arrays and outputs the learned metric score on these two points.
"""
f = mmc.get_metric()
print("debug 3: ", f)
"""
Predicts the learned metric between input pairs