def learn_mmc_metric(X_test): mmc_dict = dict() for respondent_id in range(1, 21): y_test = pd.DataFrame( pd.read_pickle(r'../data/HCON/HCON_long_lik.pkl') [respondent_id]).values.reshape(-1, 1) mask = (y_test[None] == y_test[:, None])[:, :, 0] a, b = np.nonzero(np.triu(mask, k=1)) # similarity pairs c, d = np.nonzero(np.triu(~mask, k=1)) # dissimilarity pairs mmc = MMC(convergence_threshold=0.001) try: mmc.fit(X_test.values, (a, b, c, d)) L = mmc.transform(np.diag(np.ones(9))) M = np.dot(L, L.T) except ValueError: # it should be converged anyway, # if the ValueError happens, there is some bad patterns of the input print( 'R%d has no non-trivial dissimilarity constraints given for MMC.' % respondent_id) M = 0.01 * np.diag(np.ones(9)) mmc_dict['R%d' % respondent_id] = M * 100 print('R:%2d' % respondent_id, ' First Row of MMC Mahalanobis Matrix:', (M[0] * 100).round(3)) return mmc_dict
def fit(self, X, y=None, ml=[], cl=[]): X_transformed = X if ml and cl: # ml_graph, cl_graph, _ = preprocess_constraints(ml, cl, X.shape[0]) # # ml, cl = [], [] # for i, constraints in ml_graph.items(): # for j in constraints: # ml.append((i, j)) # # for i, constraints in cl_graph.items(): # for j in constraints: # cl.append((i, j)) constraints = [np.array(lst) for lst in [*zip(*ml), *zip(*cl)]] mmc = MMC(diagonal=self.diagonal) mmc.fit(X, constraints=constraints) X_transformed = mmc.transform(X) kmeans = KMeans(n_clusters=self.n_clusters, init='random', max_iter=self.max_iter) kmeans.fit(X_transformed) self.labels_ = kmeans.labels_ return self
def runMMC(): # Run MMC from metric_learn import MMC """ Learn MMC (Mahalanobis Metrics for Clustering) Model """ mmc = MMC() mmc.fit(pairs, y) # learn the MMC model print("Mahalanobis Matrix : ", mmc.get_mahalanobis_matrix())
def fit(self, X, y=None, constraints=None): mmc = MMC(diagonal=self.diagonal) mmc.fit(X, constraints=constraints) X_transformed = mmc.transform(X) kmeans = KMeans(n_clusters=self.n_clusters, init='random', max_iter=self.max_iter) kmeans.fit(X_transformed) self.labels_ = kmeans.labels_ return self
def main(args): print("Deriving similar/dissimilar constraints for metric learning.") with gzip.open(args.transfer_acc, "rb") as fr: # transer_acc[tgt][src]: accuracy of src->tgt transfer_acc = pickle.load(fr) _mean = { l: mean(list(transfer_acc[l].values())) for l in transfer_acc.keys() } _std = { l: stdev(list(transfer_acc[l].values())) for l in transfer_acc.keys() } alpha = 0.5 sim_pairs = [] dissim_pairs = [] meta_langs = list(transfer_acc.keys()) for i in range(len(meta_langs)): for j in range(i + 1, len(meta_langs)): l1 = meta_langs[i] l2 = meta_langs[j] if transfer_acc[l1][l2] > _mean[l1] + alpha * _std[l1] and \ transfer_acc[l2][l1] > _mean[l2] + alpha * _std[l2]: sim_pairs.append([l1, l2]) elif transfer_acc[l1][l2] < _mean[l1] - alpha * _std[l1] and \ transfer_acc[l2][l1] < _mean[l2] - alpha * _std[l2]: dissim_pairs.append([l1, l2]) # constraints: [simA, simB, dissimA, dissimB] constraints = list(zip(*sim_pairs)) + list(zip(*dissim_pairs)) constraints = [ list(map(lambda l: meta_langs.index(l), lst)) for lst in constraints ] constraints = [np.array(x) for x in constraints] print("Mahalanobis metric learning.") with gzip.open(args.feature_path, "rb") as fr: typology_vec = pickle.load(fr) meta_X = np.array([typology_vec[l] for l in meta_langs]) mmc = MMC() mmc.fit(meta_X, constraints) print("Apply the learned metric to the full typology vector space.") all_langs = list(typology_vec.keys()) X = np.array([typology_vec[l] for l in all_langs]) X = mmc.transform(X).tolist() typology_vec_transformed = { all_langs[i]: X[i] for i in range(len(all_langs)) } with gzip.open(args.output_file, "wb") as fw: pickle.dump(typology_vec_transformed, fw)
def test_iris(self): # Generate full set of constraints for comparison with reference implementation n = self.iris_points.shape[0] mask = (self.iris_labels[None] == self.iris_labels[:, None]) a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric mmc = MMC(convergence_threshold=0.01) mmc.fit(self.iris_points, [a, b, c, d]) expected = [[0.000514, 0.000868, -0.001195, -0.001703], [0.000868, 0.001468, -0.002021, -0.002879], [-0.001195, -0.002021, 0.002782, 0.003964], [-0.001703, -0.002879, 0.003964, 0.005648]] assert_array_almost_equal(expected, mmc.metric(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(self.iris_points, [a, b, c, d]) expected = [0, 0, 1.210220, 1.228596] assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.2)
def test_iris(self): # Generate full set of constraints for comparison with reference implementation mask = (self.iris_labels[None] == self.iris_labels[:, None]) a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric mmc = MMC(convergence_threshold=0.01) mmc.fit(self.iris_points, [a, b, c, d]) expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265], [+0.00083371, +0.00149466, -0.00200719, -0.00296284], [-0.00111959, -0.00200719, +0.00269546, +0.00397881], [-0.00165265, -0.00296284, +0.00397881, +0.00587320]] assert_array_almost_equal(expected, mmc.metric(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(self.iris_points, [a, b, c, d]) expected = [0, 0, 1.21045968, 1.22552608] assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.2)
def learn_distance_metric(distances, pairs_per_prototype=100, test_size=0.5, return_features=False, return_pairs=False): feature_pipeline = Pipeline([ ('dates', DateFeatureTransformer()), ('features', MMCFeatureTransformer()), ]) features = feature_pipeline.fit_transform(distances) pairs = create_mmc_pairs(distances, pairs_per_prototype=pairs_per_prototype) X_train, X_test, y_train, y_test = train_test_split(pairs[:, :2], pairs[:, -1], shuffle=True, stratify=pairs[:, -1], test_size=test_size ) mmc = MMC(preprocessor=np.array(features, dtype=np.float)) mmc = mmc.fit(X_train, y_train) score = f1_score(y_test, mmc.predict(X_test), average='weighted') return SimpleNamespace( score=score, metric_components=mmc.components_.transpose(), features=None if not return_features else features, pairs=None if not return_pairs else pairs )
def test_iris(self): # Generate full set of constraints for comparison with reference # implementation mask = self.iris_labels[None] == self.iris_labels[:, None] a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric n_features = self.iris_points.shape[1] mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10) mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [[+0.000514, +0.000868, -0.001195, -0.001703], [+0.000868, +0.001468, -0.002021, -0.002879], [-0.001195, -0.002021, +0.002782, +0.003964], [-0.001703, -0.002879, +0.003964, +0.005648]] assert_array_almost_equal(expected, mmc.get_mahalanobis_matrix(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [0, 0, 1.210220, 1.228596] assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(), decimal=6) # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2)
def test_iris(self): # Generate full set of constraints for comparison with reference implementation n = self.iris_points.shape[0] mask = (self.iris_labels[None] == self.iris_labels[:,None]) a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric mmc = MMC(convergence_threshold=0.01) mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) expected = [[+0.000514, +0.000868, -0.001195, -0.001703], [+0.000868, +0.001468, -0.002021, -0.002879], [-0.001195, -0.002021, +0.002782, +0.003964], [-0.001703, -0.002879, +0.003964, +0.005648]] assert_array_almost_equal(expected, mmc.get_mahalanobis_matrix(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) expected = [0, 0, 1.210220, 1.228596] assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(), decimal=6) # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2)
""" from metric_learn import MMC pairs = [[[1.2, 7.5], [1.3, 1.5]], [[6.4, 2.6], [6.2, 9.7]], [[1.3, 4.5], [3.2, 4.6]], [[6.2, 5.5], [5.4, 5.4]]] # in this task we want points where the first feature is close to be closer to each other, # no matter how close the second feature is y = [1, 1, -1, -1] """ Learn MMC (Mahalanobis Metrics for Clustering) Model """ mmc = MMC() mmc.fit(pairs, y) # learn the MMC model """ Return the decision function used to classify the pairs """ print("debug 1: ", mmc.decision_function(pairs)) """ Returns a copy of the Mahalanobis matrix learned by the metric learner """ print("debug 2: ", mmc.get_mahalanobis_matrix()) """ Returns a function that takes as input two 1D arrays and outputs the learned metric score on these two points. """ f = mmc.get_metric() print("debug 3: ", f) """ Predicts the learned metric between input pairs