예제 #1
0
def test_candidates_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.12),
                                         (10, 0.2),
                                         (50, 0.5),
                                         (80, 0.6)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            check_nns = tree.get_candidates(x_test, 100000)
            assert len(check_nns) == len(set(check_nns))
            assert -1 not in check_nns
            assert (check_nns < X_train.shape[0]).all()
            nns = tree.get_candidates(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
예제 #2
0
def test_candidates_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.12), (10, 0.2),
                                         (50, 0.5), (80, 0.6)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            check_nns = tree.get_candidates(x_test, 100000)
            assert len(check_nns) == len(set(check_nns))
            assert -1 not in check_nns
            assert (check_nns < X_train.shape[0]).all()
            nns = tree.get_candidates(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
예제 #3
0
name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
model = RPForest(leaf_size=leaf_size, no_trees=n_trees)
#fitting
features = features.copy(order='C') #something related to Cython error
model.fit(features)
model.clear()
#indexing
for i, x in enumerate(features):
    t = Timer()
    with t:
        model.index(dict_feat[i], x.tolist())
#querying
for i in range(features.shape[0]):
    t = Timer()
    with t:
        results = model.get_candidates(features[i])
    print 'queried', dict_feat[i], 'results', results




import timeit
class Timer:
    def __init__(self, timer=None, disable_gc=False, verbose=True):
        if timer is None:
            timer = timeit.default_timer
        self.timer = timer
        self.disable_gc = disable_gc
        self.verbose = verbose
        self.start = self.end = self.interval = None
    def __enter__(self):