def test_iris(self): rca = RCA(dim=2) chunks = RCA.prepare_constraints(self.iris_labels, num_chunks=30, chunk_size=2, seed=1234) rca.fit(self.iris_points, chunks) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25)
def test_iris(self): rca = RCA(dim=2) chunks = RCA.prepare_constraints(self.iris_labels, num_chunks=30, chunk_size=2, seed=1234) rca.fit(self.iris_points, chunks) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25)
class RCA: def __init__(self): self.metric_model = RCA_ml() self.X_tr = None self.y_train = None self.X_te = None def fit(self, X_tr, y_train): """Fits the model to the prescribed data.""" self.X_tr = X_tr self.y_train = y_train return self.metric_model.fit(X_tr, y_train) def transform(self, X): """Transforms the test data according to the model""" return self.metric_model.transform(X) def predict_proba(self, X_te): """Predicts the probabilities of each of the test samples""" test_samples = X_te.shape[0] self.X_tr = self.transform(self.X_tr) clf = NearestCentroid() clf.fit(self.X_tr, self.y_train) centroids = clf.centroids_ probabilities = np.zeros((test_samples, centroids.shape[0])) for sample in xrange(test_samples): probabilities[sample] = sk_nearest_neighbour_proba( centroids, X_te[sample, :]) return probabilities
def test_rank_deficient_returns_warning(self): """Checks that if the covariance matrix is not invertible, we raise a warning message advising to use PCA""" X, y = load_iris(return_X_y=True) # we make the fourth column a linear combination of the two first, # so that the covariance matrix will not be invertible: X[:, 3] = X[:, 0] + 3 * X[:, 1] rca = RCA() msg = ('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' 'You should remove any linearly dependent features and/or ' 'reduce the dimensionality of your input, ' 'for instance using `sklearn.decomposition.PCA` as a ' 'preprocessing step.') with pytest.warns(None) as raised_warnings: rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings)
def fit(self, X, y=None, ml=[], cl=[]): X_transformed = X if ml: chunks = np.full(X.shape[0], -1) ml_graph, cl_graph, neighborhoods = preprocess_constraints( ml, cl, X.shape[0]) for i, neighborhood in enumerate(neighborhoods): chunks[neighborhood] = i # print(chunks) rca = RCA() rca.fit(X, chunks=chunks) X_transformed = rca.transform(X) # print(rca.metric()) kmeans = KMeans(n_clusters=self.n_clusters, max_iter=self.max_iter) kmeans.fit(X_transformed) self.labels_ = kmeans.labels_ return self