def test_lsml_supervised(self): seed = np.random.RandomState(1234) lsml = LSML_Supervised(n_constraints=200, random_state=seed) lsml.fit(self.X, self.y) res_1 = lsml.transform(self.X) seed = np.random.RandomState(1234) lsml = LSML_Supervised(n_constraints=200, random_state=seed) res_2 = lsml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2)
def sandwich_demo(): x, y = sandwich_data() knn = nearest_neighbors(x, k=2) ax = plt.subplot(3, 1, 1) # take the whole top row plot_sandwich_data(x, y, ax) plot_neighborhood_graph(x, knn, y, ax) ax.set_title('input space') ax.set_aspect('equal') ax.set_xticks([]) ax.set_yticks([]) mls = [ LMNN(), ITML_Supervised(num_constraints=200), SDML_Supervised(num_constraints=200), LSML_Supervised(num_constraints=200), ] for ax_num, ml in enumerate(mls, start=3): ml.fit(x, y) tx = ml.transform() ml_knn = nearest_neighbors(tx, k=2) ax = plt.subplot(3, 2, ax_num) plot_sandwich_data(tx, y, axis=ax) plot_neighborhood_graph(tx, ml_knn, y, axis=ax) ax.set_title(ml.__class__.__name__) ax.set_xticks([]) ax.set_yticks([]) plt.show()
def test_iris(self): lsml = LSML_Supervised(num_constraints=200) lsml.fit(self.iris_points, self.iris_labels) csep = class_separation(lsml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible
def get_dist_func(data : Array[np.float64], target : Array[np.float64]) -> Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64]: """ Get function that returns distances between examples in learned space. Args: data : Array[np.float64] - training data_trans target : int - target variable values (classes of training examples) Returns: Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64] -- higher order function that takes a matric function and returns a function that takes two indices of examples and returns distance between examples in learned metric space. """ # Get transformed data. data_trans : Array[np.float64] = LSML_Supervised().fit_transform(StandardScaler().fit_transform(data), target) # Computing distance: def dist_func_res(metric : Callable[[np.float64, np.float64], np.float64], i1 : np.int, i2 : np.int) -> np.float64: """ distance function that takes indices of examples in training set and returns distance in learned space using specified distance metric. Args: i1 : int - index of first training example i2 : int - index of second training example Returns: np.float64 - distance in learned metric space using specified metric between specified training examples. """ # Compute distance in learned metric space using specified metric. return metric(data_trans[i1, :], data_trans[i2, :]) return dist_func_res # Return distance function.
def process_lsml(self, **option): '''Metric Learning algorithm: LSML''' GeneExp = self.GeneExp_train Label = self.Label_train lsml = LSML_Supervised(**option) lsml.fit(GeneExp, Label) self.Trans['LSML'] = lsml.transformer()
def test_deprecation(self): # test that the right deprecation message is thrown. # TODO: remove in v.0.5 X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) y = np.array([1, 0, 1, 0]) lsml_supervised = LSML_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y)
def test_lsml_supervised(self): seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200, random_state=seed) lsml.fit(self.X, self.y) L = lsml.components_ assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix())
def test_lsml(self): check_estimator(LSML_Supervised())
[learner for (learner, _) in quadruplets_learners])) pairs_learners = [ (ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(), build_pairs), ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(), build_classification)] ids_classifiers = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin) tuples_learners = pairs_learners + quadruplets_learners
def test_lsml_supervised(self): seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) lsml.fit(self.X, self.y, random_state=seed) L = lsml.transformer_ assert_array_almost_equal(L.T.dot(L), lsml.metric())
def __init__(self): self.metric_model = LSML_Supervised(num_constraints=200) self.X_tr = None self.y_train = None self.X_te = None
X_test = np.load(osp.join(args.data_root, 'feature_test.npy')) y_test = np.load(osp.join(args.data_root, 'label_test.npy')) return X_train, X_test, y_train, y_test if __name__ == '__main__': parser = argparse.ArgumentParser("LSML") parser.add_argument('--data-root', default='./data/raw_split') parser.add_argument('--max-iter', type=int, default=1000) args = parser.parse_args() name = f"{args.max_iter}" data_save_folder = f"./data/LSML/{name}" makedirs(data_save_folder) X_train, X_test, y_train, y_test = load_split(args) print(X_train.shape) t = time.time() lsml = LSML_Supervised(max_iter=args.max_iter, verbose=1) lsml.fit(X_train, y_train) print(" # LSML fit done.") np.save(osp.join(data_save_folder, "feature_train.npy"), lsml.transform(X_train)) np.save(osp.join(data_save_folder, "label_train.npy"), y_train) np.save(osp.join(data_save_folder, "feature_test.npy"), lsml.transform(X_test)) np.save(osp.join(data_save_folder, "label_test.npy"), y_test)
def gettestData(): # Get testing file name from the command line testdatafile = sys.argv[2] # The testing file is in libSVM format ts_data = load_svmlight_file(testdatafile) Xts = ts_data[0].toarray() # Converts sparse matrices to dense Yts = ts_data[1] # The trainig labels return Xts, Yts # get training data Xtr, Ytr = gettrainData() # get testing data Xts, Yts = gettestData() # Taking only a fraction of data. i.e. 1/4th Xtr = Xtr[:len(Xtr) // 2] Ytr = Ytr[:len(Ytr) // 2] lsml = LSML_Supervised(num_constraints=1000) # learning lsml.fit(Xtr, Ytr) # Get the learnt metric M = lsml.metric() # Metric saved np.save("model.npy", M)