Beispiel #1
0
    def test_levels(self):
        """Test multiple level indexes"""
        features = np.random.binomial(1, 0.01, size=(1000, 20000))
        features = csr_matrix(features)

        # build the search index!
        data_to_return = np.array(list(range(1000)), dtype=int)

        # matrix size smaller - this forces the index to have multiple levels
        cluster_index = ci.ClusterIndex(features,
                                        data_to_return,
                                        matrix_size=10)

        ret = cluster_index.search(features[0:10],
                                   k=1,
                                   k_clusters=1,
                                   return_distance=False)
        self.assertEqual([[x] for x in data_to_return[:10]], ret)
Beispiel #2
0
    def test_dense_matrix(self):
        """Do a quick basic test for index/search functionality"""
        data = [
            'hello world',
            'oh hello there',
            'Play it',
            'Play it again Sam',
        ]

        features = [dict([(x, 1) for x in f.split()]) for f in data]
        features = DictVectorizer().fit_transform(features)
        features = features.toarray()
        cluster_index = ci.ClusterIndex(features, data, DenseCosineDistance)

        ret = cluster_index.search(features,
                                   k=1,
                                   k_clusters=1,
                                   return_distance=False)
        self.assertEqual([[d] for d in data], ret)