Exemple #1
0
 def test_iris(self):
     rca = RCA(dim=2)
     chunks = RCA.prepare_constraints(self.iris_labels,
                                      num_chunks=30,
                                      chunk_size=2,
                                      seed=1234)
     rca.fit(self.iris_points, chunks)
     csep = class_separation(rca.transform(), self.iris_labels)
     self.assertLess(csep, 0.25)
Exemple #2
0
    def test_rank_deficient_returns_warning(self):
        """Checks that if the covariance matrix is not invertible, we raise a
    warning message advising to use PCA"""
        X, y = load_iris(return_X_y=True)
        # we make the fourth column a linear combination of the two first,
        # so that the covariance matrix will not be invertible:
        X[:, 3] = X[:, 0] + 3 * X[:, 1]
        rca = RCA()
        msg = ('The inner covariance matrix is not invertible, '
               'so the transformation matrix may contain Nan values. '
               'You should remove any linearly dependent features and/or '
               'reduce the dimensionality of your input, '
               'for instance using `sklearn.decomposition.PCA` as a '
               'preprocessing step.')

        with pytest.warns(None) as raised_warnings:
            rca.fit(X, y)
        assert any(str(w.message) == msg for w in raised_warnings)
    def fit(self, X, y=None, ml=[], cl=[]):
        X_transformed = X

        if ml:
            chunks = np.full(X.shape[0], -1)
            ml_graph, cl_graph, neighborhoods = preprocess_constraints(
                ml, cl, X.shape[0])
            for i, neighborhood in enumerate(neighborhoods):
                chunks[neighborhood] = i

            # print(chunks)

            rca = RCA()
            rca.fit(X, chunks=chunks)
            X_transformed = rca.transform(X)

            # print(rca.metric())

        kmeans = KMeans(n_clusters=self.n_clusters, max_iter=self.max_iter)
        kmeans.fit(X_transformed)

        self.labels_ = kmeans.labels_

        return self
Exemple #4
0
ids_quadruplets_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in quadruplets_learners]))

pairs_learners = [
    (ITML(), build_pairs),
    (MMC(max_iter=2), build_pairs),  # max_iter=2 for faster
    (SDML(), build_pairs),
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=10), build_classification),
               (SDML_Supervised(), build_classification)]
ids_classifiers = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in classifiers]))

regressors = [(MLKR(), build_regression)]
ids_regressors = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in regressors]))

WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin)
def compute_graph(current_graph=[]):
    global image_names, labels, n_clusters
    global graph, position_constraints, prev_embedding
    global d, features
    if len(current_graph) == 0 or prev_embedding is None:
        print('Initialise graph...')
        tic = time()
        compute_embedding()  # initialise prev_embedding with standard tsne

        # find clusters
        clusters = cluster_embedding(prev_embedding,
                                     n_clusters=n_clusters,
                                     seed=seed)

        graph = create_graph(image_names,
                             prev_embedding,
                             label=clusters,
                             labels=labels)
        toc = time()
        print('Done. ({:2.0f}min {:2.1f}s)'.format((toc - tic) / 60,
                                                   (toc - tic) % 60))
        print('Embedding range: x [{}, {}], y [{}, {}]'.format(
            prev_embedding[0].min(), prev_embedding[0].max(),
            prev_embedding[1].min(), prev_embedding[1].max()))
        return graph

    print('Update graph...')
    tic = time()

    graph = format_graph(current_graph['nodes'])

    # get current embedding
    current_embedding = prev_embedding.copy()
    moved = get_moved(
        margin=2.0)  # nodes which have moved further than the given margin

    if len(moved) > 0:
        pos_moved = np.array([[graph[idx]['x'], graph[idx]['y']]
                              for idx in moved])
        current_embedding[moved] = pos_moved

    # find clusters
    clusters = cluster_embedding(current_embedding,
                                 n_clusters=n_clusters,
                                 seed=seed)

    if len(moved) > 0:
        # sample chunks from clusters
        chunk_size = int(d / 4.99)  # minimal chunk size

        chunks = make_chunks(current_embedding,
                             moved,
                             clusters,
                             chunk_size,
                             n_neighbors=5)
        # transform features
        fts_reduced_rca = RCA().fit_transform(fts_reduced, chunks)
        if np.isfinite(fts_reduced_rca).all():
            fts_reduced = fts_reduced_rca
        else:
            warnings.warn(
                'RCA features included infinite value or nan, so features are not updated.'
                'Try to group more samples or reduce cluster size.',
                RuntimeWarning)

    compute_embedding()  # update prev_embedding
    graph = create_graph(image_names,
                         prev_embedding,
                         label=clusters,
                         labels=labels)

    print('Embedding range: x [{}, {}], y [{}, {}]'.format(
        prev_embedding[0].min(), prev_embedding[0].max(),
        prev_embedding[1].min(), prev_embedding[1].max()))

    toc = time()
    print('Done. ({:2.0f}min {:2.1f}s)'.format((toc - tic) / 60,
                                               (toc - tic) % 60))

    return graph