def learn_mmc_metric(X_test): mmc_dict = dict() for respondent_id in range(1, 21): y_test = pd.DataFrame( pd.read_pickle(r'../data/HCON/HCON_long_lik.pkl') [respondent_id]).values.reshape(-1, 1) mask = (y_test[None] == y_test[:, None])[:, :, 0] a, b = np.nonzero(np.triu(mask, k=1)) # similarity pairs c, d = np.nonzero(np.triu(~mask, k=1)) # dissimilarity pairs mmc = MMC(convergence_threshold=0.001) try: mmc.fit(X_test.values, (a, b, c, d)) L = mmc.transform(np.diag(np.ones(9))) M = np.dot(L, L.T) except ValueError: # it should be converged anyway, # if the ValueError happens, there is some bad patterns of the input print( 'R%d has no non-trivial dissimilarity constraints given for MMC.' % respondent_id) M = 0.01 * np.diag(np.ones(9)) mmc_dict['R%d' % respondent_id] = M * 100 print('R:%2d' % respondent_id, ' First Row of MMC Mahalanobis Matrix:', (M[0] * 100).round(3)) return mmc_dict
def learn_distance_metric(distances, pairs_per_prototype=100, test_size=0.5, return_features=False, return_pairs=False): feature_pipeline = Pipeline([ ('dates', DateFeatureTransformer()), ('features', MMCFeatureTransformer()), ]) features = feature_pipeline.fit_transform(distances) pairs = create_mmc_pairs(distances, pairs_per_prototype=pairs_per_prototype) X_train, X_test, y_train, y_test = train_test_split(pairs[:, :2], pairs[:, -1], shuffle=True, stratify=pairs[:, -1], test_size=test_size ) mmc = MMC(preprocessor=np.array(features, dtype=np.float)) mmc = mmc.fit(X_train, y_train) score = f1_score(y_test, mmc.predict(X_test), average='weighted') return SimpleNamespace( score=score, metric_components=mmc.components_.transpose(), features=None if not return_features else features, pairs=None if not return_pairs else pairs )
def fit(self, X, y=None, ml=[], cl=[]): X_transformed = X if ml and cl: # ml_graph, cl_graph, _ = preprocess_constraints(ml, cl, X.shape[0]) # # ml, cl = [], [] # for i, constraints in ml_graph.items(): # for j in constraints: # ml.append((i, j)) # # for i, constraints in cl_graph.items(): # for j in constraints: # cl.append((i, j)) constraints = [np.array(lst) for lst in [*zip(*ml), *zip(*cl)]] mmc = MMC(diagonal=self.diagonal) mmc.fit(X, constraints=constraints) X_transformed = mmc.transform(X) kmeans = KMeans(n_clusters=self.n_clusters, init='random', max_iter=self.max_iter) kmeans.fit(X_transformed) self.labels_ = kmeans.labels_ return self
def runMMC(): # Run MMC from metric_learn import MMC """ Learn MMC (Mahalanobis Metrics for Clustering) Model """ mmc = MMC() mmc.fit(pairs, y) # learn the MMC model print("Mahalanobis Matrix : ", mmc.get_mahalanobis_matrix())
def fit(self, X, y=None, constraints=None): mmc = MMC(diagonal=self.diagonal) mmc.fit(X, constraints=constraints) X_transformed = mmc.transform(X) kmeans = KMeans(n_clusters=self.n_clusters, init='random', max_iter=self.max_iter) kmeans.fit(X_transformed) self.labels_ = kmeans.labels_ return self
def main(args): print("Deriving similar/dissimilar constraints for metric learning.") with gzip.open(args.transfer_acc, "rb") as fr: # transer_acc[tgt][src]: accuracy of src->tgt transfer_acc = pickle.load(fr) _mean = { l: mean(list(transfer_acc[l].values())) for l in transfer_acc.keys() } _std = { l: stdev(list(transfer_acc[l].values())) for l in transfer_acc.keys() } alpha = 0.5 sim_pairs = [] dissim_pairs = [] meta_langs = list(transfer_acc.keys()) for i in range(len(meta_langs)): for j in range(i + 1, len(meta_langs)): l1 = meta_langs[i] l2 = meta_langs[j] if transfer_acc[l1][l2] > _mean[l1] + alpha * _std[l1] and \ transfer_acc[l2][l1] > _mean[l2] + alpha * _std[l2]: sim_pairs.append([l1, l2]) elif transfer_acc[l1][l2] < _mean[l1] - alpha * _std[l1] and \ transfer_acc[l2][l1] < _mean[l2] - alpha * _std[l2]: dissim_pairs.append([l1, l2]) # constraints: [simA, simB, dissimA, dissimB] constraints = list(zip(*sim_pairs)) + list(zip(*dissim_pairs)) constraints = [ list(map(lambda l: meta_langs.index(l), lst)) for lst in constraints ] constraints = [np.array(x) for x in constraints] print("Mahalanobis metric learning.") with gzip.open(args.feature_path, "rb") as fr: typology_vec = pickle.load(fr) meta_X = np.array([typology_vec[l] for l in meta_langs]) mmc = MMC() mmc.fit(meta_X, constraints) print("Apply the learned metric to the full typology vector space.") all_langs = list(typology_vec.keys()) X = np.array([typology_vec[l] for l in all_langs]) X = mmc.transform(X).tolist() typology_vec_transformed = { all_langs[i]: X[i] for i in range(len(all_langs)) } with gzip.open(args.output_file, "wb") as fw: pickle.dump(typology_vec_transformed, fw)
http://contrib.scikit-learn.org/metric-learn/generated/metric_learn.MMC.html#metric_learn.MMC """ from metric_learn import MMC pairs = [[[1.2, 7.5], [1.3, 1.5]], [[6.4, 2.6], [6.2, 9.7]], [[1.3, 4.5], [3.2, 4.6]], [[6.2, 5.5], [5.4, 5.4]]] # in this task we want points where the first feature is close to be closer to each other, # no matter how close the second feature is y = [1, 1, -1, -1] """ Learn MMC (Mahalanobis Metrics for Clustering) Model """ mmc = MMC() mmc.fit(pairs, y) # learn the MMC model """ Return the decision function used to classify the pairs """ print("debug 1: ", mmc.decision_function(pairs)) """ Returns a copy of the Mahalanobis matrix learned by the metric learner """ print("debug 2: ", mmc.get_mahalanobis_matrix()) """ Returns a function that takes as input two 1D arrays and outputs the learned metric score on these two points. """ f = mmc.get_metric() print("debug 3: ", f) """
def test_iris(self): # Generate full set of constraints for comparison with reference implementation n = self.iris_points.shape[0] mask = (self.iris_labels[None] == self.iris_labels[:, None]) a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric mmc = MMC(convergence_threshold=0.01) mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [[+0.000514, +0.000868, -0.001195, -0.001703], [+0.000868, +0.001468, -0.002021, -0.002879], [-0.001195, -0.002021, +0.002782, +0.003964], [-0.001703, -0.002879, +0.003964, +0.005648]] assert_array_almost_equal(expected, mmc.get_mahalanobis_matrix(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [0, 0, 1.210220, 1.228596] assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(), decimal=6) # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2)
if with_preprocessor: # if preprocessor, we build a 2D array of quadruplets of indices return Dataset(c, target, X, c[:, 0]) else: # if not, we build a 3D array of quadruplets of samples return Dataset(X[c], target, None, X[c[:, 0]]) quadruplets_learners = [(LSML(), build_quadruplets)] ids_quadruplets_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [ (ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(), build_pairs), ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(), build_classification)] ids_classifiers = list(
if with_preprocessor: # if preprocessor, we build a 2D array of quadruplets of indices return Dataset(c, target, X, c[:, 0]) else: # if not, we build a 3D array of quadruplets of samples return Dataset(X[c], target, None, X[c[:, 0]]) quadruplets_learners = [(LSML(), build_quadruplets)] ids_quadruplets_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [ (ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(prior='identity', balance_param=1e-5), build_pairs) ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification)]
def test_iris(self): # Generate full set of constraints for comparison with reference implementation mask = (self.iris_labels[None] == self.iris_labels[:, None]) a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric mmc = MMC(convergence_threshold=0.01) mmc.fit(self.iris_points, [a, b, c, d]) expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265], [+0.00083371, +0.00149466, -0.00200719, -0.00296284], [-0.00111959, -0.00200719, +0.00269546, +0.00397881], [-0.00165265, -0.00296284, +0.00397881, +0.00587320]] assert_array_almost_equal(expected, mmc.metric(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(self.iris_points, [a, b, c, d]) expected = [0, 0, 1.21045968, 1.22552608] assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.2)
pairs.append(pair) y.append(1) for row2 in normalized_n_pairs.iterrows(): print('row2 : ', row2) lon2 = row2[1]["lon_0"] lat2 = row2[1]["lat_0"] alt2 = row2[1]["properties_alt_m"] gt_tuple2 = [lon2, lat2, alt2] obs_lon2 = row2[1]["position::longitude_degrees"] obs_lat2 = row2[1]["position::latitude_degrees"] obs_alt2 = row2[1]["position::altitude_meters"] obs_tuple2 = [obs_lon2, obs_lat2, obs_alt2] pair2 = [gt_tuple2, obs_tuple2] pairs.append(pair2) y.append(-1) print('debug : pairs >> ', pairs) print('debug : y >> ', y) # Run MMC from metric_learn import MMC """ Learn MMC (Mahalanobis Metrics for Clustering) Model """ mmc = MMC() mmc.fit(pairs, y) # learn the MMC model print("Mahalanobis Matrix : ", mmc.get_mahalanobis_matrix())
c, target = shuffle(c, target, random_state=SEED) if with_preprocessor: # if preprocessor, we build a 2D array of quadruplets of indices return Dataset(c, target, X, c[:, 0]) else: # if not, we build a 3D array of quadruplets of samples return Dataset(X[c], target, None, X[c[:, 0]]) quadruplets_learners = [(LSML(), build_quadruplets)] ids_quadruplets_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [(ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(use_cov=False, balance_param=1e-5), build_pairs)] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(use_cov=False, balance_param=1e-5),
def test_iris(self): # Generate full set of constraints for comparison with reference implementation n = self.iris_points.shape[0] mask = (self.iris_labels[None] == self.iris_labels[:,None]) a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric mmc = MMC(convergence_threshold=0.01) mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) expected = [[+0.000514, +0.000868, -0.001195, -0.001703], [+0.000868, +0.001468, -0.002021, -0.002879], [-0.001195, -0.002021, +0.002782, +0.003964], [-0.001703, -0.002879, +0.003964, +0.005648]] assert_array_almost_equal(expected, mmc.get_mahalanobis_matrix(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) expected = [0, 0, 1.210220, 1.228596] assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(), decimal=6) # Supervised Full mmc = MMC_Supervised() mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2)