Ejemplo n.º 1
0
class Test_PyFLANN_clustering(unittest.TestCase):
    def setUp(self):
        self.nn = FLANN(iterations=11)

    ##########################################################################

    def test_Rand(self):
        x = np.random.rand(100, 10000)
        nK = 10
        centroids = self.nn.kmeans(x, nK)
        self.assertTrue(len(centroids) == nK)

    def test2d_small(self):
        self.__nd_random_clustering_test(2, 2)

    def test3d_small(self):
        self.__nd_random_clustering_test(3, 3)

    def test4d_small(self):
        self.__nd_random_clustering_test(4, 4)

    def test3d_large(self):
        self.__nd_random_clustering_test(3, 3, 1000)

    def test10d_large(self):
        self.__nd_random_clustering_test(10, 2, 10)

    def test500d(self):
        self.__nd_random_clustering_test(500, 2, 10)

    def __nd_random_clustering_test(self, dim, N, dup=1):
        """
        Make a set of random points, then pass the same ones to the
        query points.  Each point should be closest to itself.
        """
        np.random.seed(0)
        x = np.random.rand(N, dim)
        xc = np.concatenate(tuple([x for i in range(dup)]))

        if dup > 1:
            xc += np.random.randn(xc.shape[0], xc.shape[1]) * 0.000001 / dim

        # rnseed = int(time.time())
        centroids = self.nn.kmeans(
            xc[np.random.permutation(len(xc))], N, centers_init='random', random_seed=2,
        )
        mindists = np.array([[sum((d1 - d2) ** 2) for d1 in x] for d2 in centroids]).min(
            0
        )
        # print mindists
        for m in mindists:
            self.assertAlmostEqual(m, 0.0, 1)

        # rnseed = int(time.time())
        centroids = self.nn.kmeans(
            xc[np.random.permutation(len(xc))], N, centers_init='gonzales', random_seed=2,
        )
        mindists = np.array([[sum((d1 - d2) ** 2) for d1 in x] for d2 in centroids]).min(
            0
        )
        # print mindists
        for m in mindists:
            self.assertAlmostEqual(m, 0.0, 1)

        centroids = self.nn.kmeans(
            xc[np.random.permutation(len(xc))], N, centers_init='kmeanspp', random_seed=2,
        )
        mindists = np.array([[sum((d1 - d2) ** 2) for d1 in x] for d2 in centroids]).min(
            0
        )
        # print mindists
        for m in mindists:
            self.assertAlmostEqual(m, 0.0, 1)

    def testrandomnumber_same(self):
        """
        self = Test_PyFLANN_clustering()
        self.setUp()
        """
        import pytest

        pytest.skip('broken, but not worth fixing')

        data = np.random.rand(1000, 2)  # Random, so we can get a lot of local minima

        rnseed = int(time.time())
        cl1 = self.nn.kmeans(data, 50, random_seed=rnseed)
        cl2 = self.nn.kmeans(data, 50, random_seed=rnseed)

        self.assertTrue(np.all(cl1 == cl2))

    def testrandnumber_different(self):

        data = np.random.rand(1000, 100)  # Random, so we can get a lot of local minima

        rnseed = int(time.time())
        cl1 = self.nn.kmeans(data, 50, random_seed=rnseed)
        cl2 = self.nn.kmeans(data, 50)

        self.assertTrue(np.any(cl1 != cl2))
Ejemplo n.º 2
0
    """
    return _VmB('VmRSS:') - since


def stacksize(since=0.0):
    """Return stack size in bytes.
    """
    return _VmB('VmStk:') - since


if __name__ == '__main__':
    print('Profiling Memory usage for pyflann; CTRL-C to stop.')
    print('Increasing total process memory, relative to the python memory, ')
    print('implies a memory leak in the external libs.')
    print('Increasing python memory implies a memory leak in the python code.')

    h = hpy()

    while True:
        s = str(h.heap())

        print('Python: %s;    Process Total: %s' % (s[: s.find('\n')], memory()))

        X = rand(30000, 2)
        pf = FLANN()
        cl = pf.kmeans(X, 20)
        del X
        del cl
        del pf
        gc.collect()