Example #1
0
def test_find_self():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]

        tree = pickle.loads(pickle.dumps(tree))

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]
Example #2
0
def test_find_self():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]

        tree = pickle.loads(pickle.dumps(tree))

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]
Example #3
0
def lvnn(fp, nt=3, k=5, iter=5, leaves=50):

    nn = np.zeros((fp.shape[0], k, 2)) - 1

    print(' start Tree build')
    model = RPForest(leaf_size=leaves, no_trees=nt)
    model.fit(fp)
    for i in range(0, fp.shape[0]):
        nn[i, :, 0] = model.query(fp[i, ], k)

    t = 0
    while t < iter:
        t += 1
        old_nn = nn
        for i in range(0, fp.shape[0]):
            h = set()
            for j in range(0, k):
                ji = old_nn[i, j, 0]
                for l in range(0, k):
                    li = old_nn[ji, l, 0]
                    d = -np.linalg.norm(fp[i, :] - fp[li, :])
                    h.update([(li, d)])
                nn[i, :, :] = np.array(nsmallest(k, h))

    csr = np.zeros((fp.shape[0] * k, 3))
    l = 0
    for i in range(fp.shape[0]):
        for j in range(k):
            csr[l, 0] = i
            csr[l, 1] = nn[i, j, 0]
            csr[l, 2] = nn[i, j, 1]
            l = l + 1
    return csr
Example #4
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Example #5
0
def test_serialization_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        # Serialize and deserialize
        tree = pickle.loads(pickle.dumps(tree))

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Example #6
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Example #7
0
def test_serialization_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        # Serialize and deserialize
        tree = pickle.loads(pickle.dumps(tree))

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Example #8
0
class RPForest(BaseANN):
    def __init__(self, leaf_size, n_trees):
        from rpforest import RPForest
        self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
        self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees)

    def fit(self, X):
        self._model.fit(X)

    def query(self, v, n):
        return self._model.query(v, n)
Example #9
0
class RPForest(BaseANN):
    def __init__(self, leaf_size, n_trees):
        from rpforest import RPForest
        self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
        self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees)

    def fit(self, X):
        self._model.fit(X)

    def query(self, v, n):
        return self._model.query(v, n)
Example #10
0
    b = [350]

    for leaf_size in a:
        for no_trees in b:
            fq = open('fq_RPForest.txt', 'a')
            if X.dtype != np.double:
                X = np.array(X).astype(np.double)
            t = RPForest(leaf_size, no_trees)
            t.fit(X)
            start_query = time.time()
            accuracy = 0
            for i in range(len(xq)):
                v = xq[i]
                if v.dtype != np.double:
                    v = np.array(v).astype(np.double)
                ans = t.query(v, k)
                for x in ans:
                    if x in gt[i]:
                        accuracy += 1

            end_query = time.time()
            print(leaf_size, no_trees)
            print(round(accuracy / len(xq) / k, 4),
                  ": ",
                  round(end_query - start_query, 4),
                  ",",
                  file=fq,
                  sep="")
            fq.close()

            print('time query:', end_query - start_query)