def learn_mmc_metric(X_test):
    mmc_dict = dict()

    for respondent_id in range(1, 21):

        y_test = pd.DataFrame(
            pd.read_pickle(r'../data/HCON/HCON_long_lik.pkl')
            [respondent_id]).values.reshape(-1, 1)

        mask = (y_test[None] == y_test[:, None])[:, :, 0]
        a, b = np.nonzero(np.triu(mask, k=1))  # similarity pairs
        c, d = np.nonzero(np.triu(~mask, k=1))  # dissimilarity pairs
        mmc = MMC(convergence_threshold=0.001)

        try:
            mmc.fit(X_test.values, (a, b, c, d))
            L = mmc.transform(np.diag(np.ones(9)))
            M = np.dot(L, L.T)
        except ValueError:
            # it should be converged anyway,
            # if the ValueError happens, there is some bad patterns of the input
            print(
                'R%d has no non-trivial dissimilarity constraints given for MMC.'
                % respondent_id)
            M = 0.01 * np.diag(np.ones(9))

        mmc_dict['R%d' % respondent_id] = M * 100
        print('R:%2d' % respondent_id, ' First Row of MMC Mahalanobis Matrix:',
              (M[0] * 100).round(3))

    return mmc_dict
Esempio n. 2
0
def learn_distance_metric(distances, pairs_per_prototype=100, 
                                     test_size=0.5, 
                                     return_features=False,
                                     return_pairs=False):
    feature_pipeline = Pipeline([
        ('dates', DateFeatureTransformer()),
        ('features', MMCFeatureTransformer()),
    ])
    
    features = feature_pipeline.fit_transform(distances)
    pairs = create_mmc_pairs(distances, pairs_per_prototype=pairs_per_prototype)
    
    X_train, X_test, y_train, y_test = train_test_split(pairs[:, :2], pairs[:, -1], 
                shuffle=True, stratify=pairs[:, -1], test_size=test_size
    )

    mmc = MMC(preprocessor=np.array(features, dtype=np.float))
    mmc = mmc.fit(X_train, y_train)
    score = f1_score(y_test, mmc.predict(X_test), average='weighted')
    return SimpleNamespace(
        score=score,
        metric_components=mmc.components_.transpose(),
        features=None if not return_features else features,
        pairs=None if not return_pairs else pairs 
    )
Esempio n. 3
0
    def fit(self, X, y=None, ml=[], cl=[]):
        X_transformed = X

        if ml and cl:
            # ml_graph, cl_graph, _ = preprocess_constraints(ml, cl, X.shape[0])
            #
            # ml, cl = [], []
            # for i, constraints in ml_graph.items():
            #     for j in constraints:
            #         ml.append((i, j))
            #
            # for i, constraints in cl_graph.items():
            #     for j in constraints:
            #         cl.append((i, j))

            constraints = [np.array(lst) for lst in [*zip(*ml), *zip(*cl)]]
            mmc = MMC(diagonal=self.diagonal)
            mmc.fit(X, constraints=constraints)
            X_transformed = mmc.transform(X)

        kmeans = KMeans(n_clusters=self.n_clusters, init='random', max_iter=self.max_iter)
        kmeans.fit(X_transformed)

        self.labels_ = kmeans.labels_

        return self
Esempio n. 4
0
def runMMC():

    # Run MMC
    from metric_learn import MMC
    """
    Learn MMC (Mahalanobis Metrics for Clustering) Model 
    """
    mmc = MMC()
    mmc.fit(pairs, y)  # learn the MMC model
    print("Mahalanobis Matrix : ", mmc.get_mahalanobis_matrix())
 def fit(self, X, y=None, constraints=None):
     mmc = MMC(diagonal=self.diagonal)
     mmc.fit(X, constraints=constraints)
     X_transformed = mmc.transform(X)
     kmeans = KMeans(n_clusters=self.n_clusters,
                     init='random',
                     max_iter=self.max_iter)
     kmeans.fit(X_transformed)
     self.labels_ = kmeans.labels_
     return self
Esempio n. 6
0
def main(args):
    print("Deriving similar/dissimilar constraints for metric learning.")
    with gzip.open(args.transfer_acc, "rb") as fr:
        # transer_acc[tgt][src]: accuracy of src->tgt
        transfer_acc = pickle.load(fr)
    _mean = {
        l: mean(list(transfer_acc[l].values()))
        for l in transfer_acc.keys()
    }
    _std = {
        l: stdev(list(transfer_acc[l].values()))
        for l in transfer_acc.keys()
    }

    alpha = 0.5
    sim_pairs = []
    dissim_pairs = []

    meta_langs = list(transfer_acc.keys())
    for i in range(len(meta_langs)):
        for j in range(i + 1, len(meta_langs)):
            l1 = meta_langs[i]
            l2 = meta_langs[j]
            if transfer_acc[l1][l2] > _mean[l1] + alpha * _std[l1] and \
               transfer_acc[l2][l1] > _mean[l2] + alpha * _std[l2]:
                sim_pairs.append([l1, l2])
            elif transfer_acc[l1][l2] < _mean[l1] - alpha * _std[l1] and \
                 transfer_acc[l2][l1] < _mean[l2] - alpha * _std[l2]:
                dissim_pairs.append([l1, l2])

    # constraints: [simA, simB, dissimA, dissimB]
    constraints = list(zip(*sim_pairs)) + list(zip(*dissim_pairs))
    constraints = [
        list(map(lambda l: meta_langs.index(l), lst)) for lst in constraints
    ]
    constraints = [np.array(x) for x in constraints]

    print("Mahalanobis metric learning.")
    with gzip.open(args.feature_path, "rb") as fr:
        typology_vec = pickle.load(fr)
    meta_X = np.array([typology_vec[l] for l in meta_langs])
    mmc = MMC()
    mmc.fit(meta_X, constraints)

    print("Apply the learned metric to the full typology vector space.")
    all_langs = list(typology_vec.keys())
    X = np.array([typology_vec[l] for l in all_langs])
    X = mmc.transform(X).tolist()
    typology_vec_transformed = {
        all_langs[i]: X[i]
        for i in range(len(all_langs))
    }

    with gzip.open(args.output_file, "wb") as fw:
        pickle.dump(typology_vec_transformed, fw)
http://contrib.scikit-learn.org/metric-learn/generated/metric_learn.MMC.html#metric_learn.MMC
"""

from metric_learn import MMC

pairs = [[[1.2, 7.5], [1.3, 1.5]], [[6.4, 2.6], [6.2, 9.7]],
         [[1.3, 4.5], [3.2, 4.6]], [[6.2, 5.5], [5.4, 5.4]]]

# in this task we want points where the first feature is close to be closer to each other,
# no matter how close the second feature is

y = [1, 1, -1, -1]
"""
Learn MMC (Mahalanobis Metrics for Clustering) Model 
"""
mmc = MMC()
mmc.fit(pairs, y)  # learn the MMC model
"""
Return the decision function used to classify the pairs
"""
print("debug 1: ", mmc.decision_function(pairs))
"""
Returns a copy of the Mahalanobis matrix learned by the metric learner
"""
print("debug 2: ", mmc.get_mahalanobis_matrix())
"""
Returns a function that takes as input two 1D arrays and outputs the learned metric score on these two points.
"""
f = mmc.get_metric()
print("debug 3: ", f)
"""
Esempio n. 8
0
    def test_iris(self):

        # Generate full set of constraints for comparison with reference implementation
        n = self.iris_points.shape[0]
        mask = (self.iris_labels[None] == self.iris_labels[:, None])
        a, b = np.nonzero(np.triu(mask, k=1))
        c, d = np.nonzero(np.triu(~mask, k=1))

        # Full metric
        mmc = MMC(convergence_threshold=0.01)
        mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
        expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                    [+0.000868, +0.001468, -0.002021, -0.002879],
                    [-0.001195, -0.002021, +0.002782, +0.003964],
                    [-0.001703, -0.002879, +0.003964, +0.005648]]
        assert_array_almost_equal(expected,
                                  mmc.get_mahalanobis_matrix(),
                                  decimal=6)

        # Diagonal metric
        mmc = MMC(diagonal=True)
        mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
        expected = [0, 0, 1.210220, 1.228596]
        assert_array_almost_equal(np.diag(expected),
                                  mmc.get_mahalanobis_matrix(),
                                  decimal=6)

        # Supervised Full
        mmc = MMC_Supervised()
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.15)

        # Supervised Diagonal
        mmc = MMC_Supervised(diagonal=True)
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.2)
Esempio n. 9
0
    if with_preprocessor:
        # if preprocessor, we build a 2D array of quadruplets of indices
        return Dataset(c, target, X, c[:, 0])
    else:
        # if not, we build a 3D array of quadruplets of samples
        return Dataset(X[c], target, None, X[c[:, 0]])


quadruplets_learners = [(LSML(), build_quadruplets)]
ids_quadruplets_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in quadruplets_learners]))

pairs_learners = [
    (ITML(), build_pairs),
    (MMC(max_iter=2), build_pairs),  # max_iter=2 for faster
    (SDML(), build_pairs),
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=10), build_classification),
               (SDML_Supervised(), build_classification)]
ids_classifiers = list(
Esempio n. 10
0
    if with_preprocessor:
        # if preprocessor, we build a 2D array of quadruplets of indices
        return Dataset(c, target, X, c[:, 0])
    else:
        # if not, we build a 3D array of quadruplets of samples
        return Dataset(X[c], target, None, X[c[:, 0]])


quadruplets_learners = [(LSML(), build_quadruplets)]
ids_quadruplets_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in quadruplets_learners]))

pairs_learners = [
    (ITML(max_iter=2), build_pairs),  # max_iter=2 to be faster
    (MMC(max_iter=2), build_pairs),  # max_iter=2 to be faster
    (SDML(prior='identity', balance_param=1e-5), build_pairs)
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=5), build_classification),
               (SDML_Supervised(prior='identity',
                                balance_param=1e-5), build_classification)]
Esempio n. 11
0
    def test_iris(self):

        # Generate full set of constraints for comparison with reference implementation
        mask = (self.iris_labels[None] == self.iris_labels[:, None])
        a, b = np.nonzero(np.triu(mask, k=1))
        c, d = np.nonzero(np.triu(~mask, k=1))

        # Full metric
        mmc = MMC(convergence_threshold=0.01)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265],
                    [+0.00083371, +0.00149466, -0.00200719, -0.00296284],
                    [-0.00111959, -0.00200719, +0.00269546, +0.00397881],
                    [-0.00165265, -0.00296284, +0.00397881, +0.00587320]]
        assert_array_almost_equal(expected, mmc.metric(), decimal=6)

        # Diagonal metric
        mmc = MMC(diagonal=True)
        mmc.fit(self.iris_points, [a, b, c, d])
        expected = [0, 0, 1.21045968, 1.22552608]
        assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)

        # Supervised Full
        mmc = MMC_Supervised()
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.15)

        # Supervised Diagonal
        mmc = MMC_Supervised(diagonal=True)
        mmc.fit(self.iris_points, self.iris_labels)
        csep = class_separation(mmc.transform(), self.iris_labels)
        self.assertLess(csep, 0.2)
Esempio n. 12
0
  def test_iris(self):

    # Generate full set of constraints for comparison with reference implementation
    mask = (self.iris_labels[None] == self.iris_labels[:, None])
    a, b = np.nonzero(np.triu(mask, k=1))
    c, d = np.nonzero(np.triu(~mask, k=1))

    # Full metric
    mmc = MMC(convergence_threshold=0.01)
    mmc.fit(self.iris_points, [a, b, c, d])
    expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265],
                [+0.00083371, +0.00149466, -0.00200719, -0.00296284],
                [-0.00111959, -0.00200719, +0.00269546, +0.00397881],
                [-0.00165265, -0.00296284, +0.00397881, +0.00587320]]
    assert_array_almost_equal(expected, mmc.metric(), decimal=6)

    # Diagonal metric
    mmc = MMC(diagonal=True)
    mmc.fit(self.iris_points, [a, b, c, d])
    expected = [0, 0, 1.21045968, 1.22552608]
    assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)

    # Supervised Full
    mmc = MMC_Supervised()
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(), self.iris_labels)
    self.assertLess(csep, 0.15)

    # Supervised Diagonal
    mmc = MMC_Supervised(diagonal=True)
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(), self.iris_labels)
    self.assertLess(csep, 0.2)
Esempio n. 13
0
        pairs.append(pair)
        y.append(1)

    for row2 in normalized_n_pairs.iterrows():
        print('row2 : ', row2)
        lon2 = row2[1]["lon_0"]
        lat2 = row2[1]["lat_0"]
        alt2 = row2[1]["properties_alt_m"]
        gt_tuple2 = [lon2, lat2, alt2]

        obs_lon2 = row2[1]["position::longitude_degrees"]
        obs_lat2 = row2[1]["position::latitude_degrees"]
        obs_alt2 = row2[1]["position::altitude_meters"]
        obs_tuple2 = [obs_lon2, obs_lat2, obs_alt2]

        pair2 = [gt_tuple2, obs_tuple2]
        pairs.append(pair2)
        y.append(-1)

    print('debug : pairs >> ', pairs)
    print('debug : y >> ', y)

    # Run MMC
    from metric_learn import MMC
    """
    Learn MMC (Mahalanobis Metrics for Clustering) Model 
    """
    mmc = MMC()
    mmc.fit(pairs, y)  # learn the MMC model
    print("Mahalanobis Matrix : ", mmc.get_mahalanobis_matrix())
Esempio n. 14
0
  c, target = shuffle(c, target, random_state=SEED)
  if with_preprocessor:
    # if preprocessor, we build a 2D array of quadruplets of indices
    return Dataset(c, target, X, c[:, 0])
  else:
    # if not, we build a 3D array of quadruplets of samples
    return Dataset(X[c], target, None, X[c[:, 0]])


quadruplets_learners = [(LSML(), build_quadruplets)]
ids_quadruplets_learners = list(map(lambda x: x.__class__.__name__,
                                [learner for (learner, _) in
                                 quadruplets_learners]))

pairs_learners = [(ITML(), build_pairs),
                  (MMC(max_iter=2), build_pairs),  # max_iter=2 for faster
                  (SDML(use_cov=False, balance_param=1e-5), build_pairs)]
ids_pairs_learners = list(map(lambda x: x.__class__.__name__,
                              [learner for (learner, _) in
                               pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification),
               (LMNN(), build_classification),
               (NCA(), build_classification),
               (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=10), build_classification),
               (SDML_Supervised(use_cov=False, balance_param=1e-5),
Esempio n. 15
0
  def test_iris(self):

    # Generate full set of constraints for comparison with reference implementation
    n = self.iris_points.shape[0]
    mask = (self.iris_labels[None] == self.iris_labels[:,None])
    a, b = np.nonzero(np.triu(mask, k=1))
    c, d = np.nonzero(np.triu(~mask, k=1))

    # Full metric
    mmc = MMC(convergence_threshold=0.01)
    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
    expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                [+0.000868, +0.001468, -0.002021, -0.002879],
                [-0.001195, -0.002021, +0.002782, +0.003964],
                [-0.001703, -0.002879, +0.003964, +0.005648]]
    assert_array_almost_equal(expected, mmc.get_mahalanobis_matrix(),
                              decimal=6)

    # Diagonal metric
    mmc = MMC(diagonal=True)
    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
    expected = [0, 0, 1.210220, 1.228596]
    assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(),
                              decimal=6)

    # Supervised Full
    mmc = MMC_Supervised()
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.15)
    
    # Supervised Diagonal
    mmc = MMC_Supervised(diagonal=True)
    mmc.fit(self.iris_points, self.iris_labels)
    csep = class_separation(mmc.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.2)