def process_rca(self, **option): '''Metric Learning algorithm: RCA''' GeneExp = self.GeneExp_train Label = self.Label_train rca = RCA_Supervised(num_chunks=30, chunk_size=2) rca.fit(GeneExp, Label) self.Trans['RCA'] = rca.transformer()
def stable_init(self, num_dims=None, pca_comps=None, chunk_size=2, preprocessor=None): # this init makes RCA stable for scikit-learn examples. RCA_Supervised.__init__(self, num_chunks=2, num_dims=num_dims, pca_comps=pca_comps, chunk_size=chunk_size, preprocessor=preprocessor)
def test_bad_parameters(self): n = 200 num_chunks = 3 X, y = make_classification(random_state=42, n_samples=n, n_features=6, n_informative=6, n_redundant=0) rca = RCA_Supervised(num_chunks=num_chunks, random_state=42) msg = ('Due to the parameters of RCA_Supervised, ' 'the inner covariance matrix is not invertible, ' 'so the transformation matrix will contain Nan values. ' 'Increase the number or size of the chunks to correct ' 'this problem.' ) with pytest.warns(None) as raised_warning: rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warning)
def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) res_1 = rca.transform() seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) res_2 = rca.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2)
def test_feature_null_variance(self): X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1))) # Apply PCA with the number of components rca = RCA_Supervised(num_dims=2, pca_comps=3, num_chunks=30, chunk_size=2) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30) # Apply PCA with the minimum variance ratio rca = RCA_Supervised(num_dims=2, pca_comps=0.95, num_chunks=30, chunk_size=2) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30)
def runRCA(X_train, X_test, y_train, t_test): transformer = RCA_Supervised() transformer.fit(X_train, y_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) np.save('X_train_RCA', X_train_proj) np.save('X_test_RCA', X_test_proj) return X_train_proj, X_test_proj
def test_feature_null_variance(self): X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1))) # Apply PCA with the number of components rca = RCA_Supervised(num_dims=2, pca_comps=3, num_chunks=30, chunk_size=2) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.30) # Apply PCA with the minimum variance ratio rca = RCA_Supervised(num_dims=2, pca_comps=0.95, num_chunks=30, chunk_size=2) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.30)
def test_unknown_labels(self): n = 200 num_chunks = 50 X, y = make_classification(random_state=42, n_samples=2 * n, n_features=6, n_informative=6, n_redundant=0) y2 = np.concatenate((y[:n], -np.ones(n))) rca = RCA_Supervised(num_chunks=num_chunks, random_state=42) rca.fit(X[:n], y[:n]) rca2 = RCA_Supervised(num_chunks=num_chunks, random_state=42) rca2.fit(X, y2) assert not np.any(np.isnan(rca.components_)) assert not np.any(np.isnan(rca2.components_)) np.testing.assert_array_equal(rca.components_, rca2.components_)
def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) res_2 = rca.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2)
def get_dist_func( data: Array[np.float64], target: Array[np.float64] ) -> Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64]: """ Get function that returns distances between examples in learned space. Args: data : Array[np.float64] - training data_trans target : int - target variable values (classes of training examples) Returns: Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64] -- higher order function that takes a matric function and returns a function that takes two indices of examples and returns distance between examples in learned metric space. """ # Get transformed data. data_trans: Array[np.float64] = RCA_Supervised().fit_transform( StandardScaler().fit_transform(data), target) # Computing distance: def dist_func_res(metric: Callable[[np.float64, np.float64], np.float64], i1: np.int, i2: np.int) -> np.float64: """ distance function that takes indices of examples in training set and returns distance in learned space using specified distance metric. Args: i1 : int - index of first training example i2 : int - index of second training example Returns: np.float64 - distance in learned metric space using specified metric between specified training examples. """ # Compute distance in learned metric space using specified metric. return metric(data_trans[i1, :], data_trans[i2, :]) return dist_func_res # Return distance function.
# print('ssssssss', TrainData) x = nca.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'SDML': print("Method: SDML", '\n') sdml = SDML_Supervised(num_constraints=200) x = sdml.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') elif Method == 'RCA': print("Method: RCA", '\n') rca = RCA_Supervised(num_chunks=2, chunk_size=1) x = rca.fit(FSTrainData, TrainLabels) TFSTestData = x.transform(FSTestData) print('Transformation Done', '\n') #print(len(TTestData)) #print(TTestData[0]) #rca = RCA_Supervised(num_chunks=2, chunk_size=1) #x= rca.fit(TrainData, targets) #TTestData = x.transform(TestData) #transformer = x.transformer() #print(TTestData)
pairs_learners = [ (ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(), build_pairs), ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(), build_classification)] ids_classifiers = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin) tuples_learners = pairs_learners + quadruplets_learners ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners
def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) L = rca.transformer_ assert_array_almost_equal(L.T.dot(L), rca.metric())
def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) L = rca.components_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix())
def test_iris(self): rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25)
def test_iris(self): rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25)
def test_rca_supervised(self): rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y) L = rca.components_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix())
def test_rca_supervised(self): seed = np.random.RandomState(1234) rca = RCA_Supervised(num_dims=2, num_chunks=30, chunk_size=2) rca.fit(self.X, self.y, random_state=seed) L = rca.transformer_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix())