Esempio n. 1
0
class Matcher:
    def __init__(self, cfg):
        self.mutual_best=cfg['mutual_best']
        self.ratio_test=cfg['ratio_test']
        self.ratio=cfg['ratio']
        self.use_cuda=cfg['cuda']
        self.flann=FLANN()
        if self.use_cuda:
            self.match_fn_1=lambda desc0,desc1: find_nearest_point_idx(desc1, desc0)
            self.match_fn_2=lambda desc0,desc1: find_first_and_second_nearest_point(desc1, desc0)
        else:
            self.match_fn_1=lambda desc0,desc1: self.flann.nn(desc1, desc0, 1, algorithm='linear')
            self.match_fn_2=lambda desc0,desc1: self.flann.nn(desc1, desc0, 2, algorithm='linear')

    def match(self,desc0,desc1,*args,**kwargs):
        mask=np.ones(desc0.shape[0],dtype=np.bool)
        if self.ratio_test:
            idxs,dists = self.match_fn_2(desc0,desc1)

            dists=np.sqrt(dists) # note the distance is squared
            ratio_mask=dists[:,0]/dists[:,1]<self.ratio
            mask&=ratio_mask
            idxs=idxs[:,0]
        else:
            idxs,_=self.match_fn_1(desc0,desc1)

        if self.mutual_best:
            idxs_mutual,_=self.match_fn_1(desc1,desc0)
            mutual_mask = np.arange(desc0.shape[0]) == idxs_mutual[idxs]
            mask&=mutual_mask

        matches=np.concatenate([np.arange(desc0.shape[0])[:,None],idxs[:,None]],axis=1)
        matches=matches[mask]

        return matches
Esempio n. 2
0
def assign_nearest_jobs(agent_idle, agent_job, agent_pos, blocked, jobs, left_jobs, n):
    from pyflann import FLANN
    children = []
    starts = []
    ends = []
    ends_job = []
    for left_job in left_jobs:  # this makes many children ...
        ends.append(left_job[0])
        ends_job.append(jobs.index(left_job))
    for i_a in range(len(agent_pos)):
        if agent_job[i_a]:  # has assignment
            i_j = agent_job[i_a][-1]
            starts.append(jobs[i_j][0])
        else:
            starts.append(agent_pos[i_a])
    flann = FLANN()
    result, dists = flann.nn(
        np.array(ends, dtype=float),
        np.array(starts, dtype=float),
        (n if len(ends) >= n else len(ends)),
        algorithm="kmeans",
        branching=32,
        iterations=7,
        checks=16)
    assert len(agent_pos) == len(result), "Not the right amount of results"
    for i_a in range(len(agent_pos)):
        if len(result.shape) == 1:
            result = np.array(list(map(lambda x: [x, ], result)))
        for res in result[i_a]:
            agent_job_new = agent_job.copy()
            agent_job_new[i_a] += (ends_job[res],)
            children.append(comp2state(tuple(agent_job_new),
                                       agent_idle,
                                       blocked))
    return children
Esempio n. 3
0
def get_closest(possible_starts, free_tasks_starts, grid, n):
    flann = FLANN()
    result, dists = flann.nn(possible_starts,
                             free_tasks_starts,
                             n,
                             algorithm="kmeans",
                             branching=32,
                             iterations=7,
                             checks=16)
    lengths = []
    nearestss = []
    paths = []
    INF = 2 * np.max(np.max(dists))
    for i in range(n):
        temp_nearest = np.unravel_index(np.argmin(dists),
                                        [len(possible_starts), n])
        dists[temp_nearest] = INF
        nearestss.append(temp_nearest)

        temp_i_possible_starts = result[temp_nearest]
        temp_i_free_tasks_start = temp_nearest[0]
        p, _ = path(tuple(possible_starts[temp_i_possible_starts]),
                    tuple(free_tasks_starts[temp_i_free_tasks_start]), grid,
                    [])
        if p:
            lengths.append(len(p))
        paths.append(p)
    best_path = np.argmin(lengths)
    nearest = nearestss[best_path]
    i_free_tasks_start = nearest[0]
    i_possible_starts = result[nearest]
    return i_free_tasks_start, i_possible_starts, paths[best_path]
class NearestFilter:
    def __init__(self, k: int):
        self.k = k
        self.flann = FLANN()

    def filter(self, v, points, labels):
        [neighbours_i
         ], _ = self.flann.nn(points,
                              v.astype('float32'),
                              num_neighbors=min(self.k, len(points)),
                              algorithm='linear')
        return points[neighbours_i], labels[neighbours_i]
def create_affinity(X,
                    knn,
                    scale=None,
                    alg="annoy",
                    savepath=None,
                    W_path=None):
    N, D = X.shape
    if W_path is not None:
        if W_path.endswith('.mat'):
            W = sio.loadmat(W_path)['W']
        elif W_path.endswith('.npz'):
            W = sparse.load_npz(W_path)
    else:

        print('Compute Affinity ')
        start_time = timeit.default_timer()
        if alg == "flann":
            print('with Flann')
            flann = FLANN()
            knnind, dist = flann.nn(X,
                                    X,
                                    knn,
                                    algorithm="kdtree",
                                    target_precision=0.9,
                                    cores=5)
            # knnind = knnind[:,1:]
        else:
            nbrs = NearestNeighbors(n_neighbors=knn).fit(X)
            dist, knnind = nbrs.kneighbors(X)

        row = np.repeat(range(N), knn - 1)
        col = knnind[:, 1:].flatten()
        if scale is None:
            data = np.ones(X.shape[0] * (knn - 1))
        elif scale is True:
            scale = np.median(dist[:, 1:])
            data = np.exp((-dist[:, 1:]**2) / (2 * scale**2)).flatten()
        else:
            data = np.exp((-dist[:, 1:]**2) / (2 * scale**2)).flatten()

        W = sparse.csc_matrix((data, (row, col)), shape=(N, N), dtype=np.float)
        W = (W + W.transpose(copy=True)) / 2
        elapsed = timeit.default_timer() - start_time
        print(elapsed)

        if isinstance(savepath, str):
            if savepath.endswith('.npz'):
                sparse.save_npz(savepath, W)
            elif savepath.endswith('.mat'):
                sio.savemat(savepath, {'W': W})

    return W
Esempio n. 6
0
def match(desc1, desc2, dist_ratio=0.6, num_trees=4):
    flann = FLANN()
#    result, dists = flann.nn(desc2, desc1, 2, algorithm="kmeans",
#                             branching=32, iterations=7, checks=16)
    result, dists = flann.nn(desc2, desc1, 2,
                             algorithm='kdtree', trees=num_trees)

    matchscores = zeros((desc1.shape[0]), 'int')
    for idx1, (idx2, _idx_second_nearest) in enumerate(result):
        nearest, second_nearest = dists[idx1]
        if nearest < dist_ratio * second_nearest:
            matchscores[idx1] = idx2
    return matchscores
Esempio n. 7
0
def nn_match(descs1, descs2):
    """
    Perform nearest neighbor match, using descriptors.
    
    This function uses pyflann
    
    :param descs1: descriptors from image 1, (N1, D)
    :param descs2: descriptors from image 2, (N2, D)
    :return indices: indices into keypoints from image 2, (N1, D)
    """
    # diff = descs1[:, None, :] - descs2[None, :, :]
    # diff = np.linalg.norm(diff, ord=2, axis=2)
    # indices = np.argmin(diff, axis=1)
    
    # flann = cv2.FlannBasedMatcher_create()
    # matches = flann.match(descs1.astype(np.float32), descs2.astype(np.float32))
    # indices = [x.trainIdx for x in matches]
    flann = FLANN()
    indices, _ = flann.nn(descs2, descs1, algorithm="kdtree", trees=4)
    
    return indices
Esempio n. 8
0
class KNeighborsClassifier():

    def __init__(self, n_neighbors=5,weights='uniform'):
        """hyper parameters of teh FLANN algorithm"""

        self.algrithm_choice = "kmeans"
        self.branching = 32
        self.iterations = 7
        self.checks = 16

        """Basic KNN parameters"""

        self.n_neighbors = n_neighbors
        self.weights = weights
        self.flann = FLANN()





    def fit(self,X,Y):
        self.train_data = np.asarray(X).astype(np.float32)

        if Y.ndim == 1 or Y.ndim == 2 and Y.shape[1] == 1:
            if Y.ndim != 1:
                warnings.warn("A column-vector y was passed when a 1d array "
                              "was expected. Please change the shape of y to "
                              "(n_samples, ), for example using ravel().",
                              DataConversionWarning, stacklevel=2)
            print("XXXdasdasdaX!!!")
            self.outputs_2d_ = False
            Y = Y.reshape((-1, 1))
            print(Y.shape)
        else:
            self.outputs_2d_ = True

        self.classes_ = []
        self.train_label = np.empty(Y.shape, dtype=np.int)
        for k in range(self.train_label.shape[1]):
            classes, self.train_label[:, k] = np.unique(Y[:, k], return_inverse=True)
            self.classes_.append(classes)

        if not self.outputs_2d_:
            self.classes_ = self.classes_[0]
            self.train_label = self.train_label.ravel()



    def predict(self, X, n_neighbors=None):
        """Predict the class labels for the provided data.
        Parameters
        ----------
        X : array-like, shape (n_queries, n_features), \
                or (n_queries, n_indexed) if metric == 'precomputed'
            Test samples.
        Returns
        -------
        y : array of shape [n_queries] or [n_queries, n_outputs]
            Class labels for each data sample.
        """
        if n_neighbors is not None:
            self.n_neighbors = n_neighbors

        X = check_array(X, accept_sparse='csr')
        X = X.astype(np.float32)

        neigh_dist, neigh_ind = self.kneighbors(X)

        classes_ = self.classes_
        _y = self.train_label
        if not self.outputs_2d_:
            _y = self.train_label.reshape((-1, 1))
            classes_ = [self.classes_]

        n_outputs = len(classes_)
        n_queries = X.shape[0]
        weights = _get_weights(neigh_dist, self.weights)

        y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)
        for k, classes_k in enumerate(classes_):
            if weights is None:
                mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
            else:
                mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)

            mode = np.asarray(mode.ravel(), dtype=np.intp)
            y_pred[:, k] = classes_k.take(mode)

        if not self.outputs_2d_:
            y_pred = y_pred.ravel()

        return y_pred

        return y_pred

    def kneighbors(self,test_data):
        nearest_neighbours,dists = self.flann.nn(self.train_data,test_data,self.n_neighbors,algorithm=self.algrithm_choice, branching=self.branching, iterations=self.iterations, checks=self.checks)
        if len(nearest_neighbours.shape) == 1:
            nearest_neighbours = nearest_neighbours.reshape((-1, 1))
            dists = dists.reshape((-1, 1))
        return dists, nearest_neighbours
Esempio n. 9
0

def stacksize(since=0.0):
    """Return stack size in bytes.
    """
    return _VmB('VmStk:') - since


if __name__ == '__main__':
    print('Profiling Memory usage for pyflann; CTRL-C to stop.')
    print('Increasing total process memory, relative to the python memory, ')
    print('implies a memory leak in the external libs.')
    print('Increasing python memory implies a memory leak in the python code.')

    h = hpy()

    while True:
        s = str(h.heap())

        print('Python: %s;    Process Total: %s' % (s[: s.find('\n')], memory()))

        X1 = rand(50000, 2)
        X2 = rand(50000, 2)
        pf = FLANN()
        nnlist = pf.nn(X1, X2)
        del X1
        del X2
        del nnlist
        del pf
        gc.collect()
def make_test(test_start=1000, test_end=1050):
    f1 = open('states.pkl', 'r')
    f2 = open('states_for_test.pkl', 'r')
    data_states = cPickle.load(f1)
    test_states = cPickle.load(f2)
    f1.close()
    f2.close()

    time_brute = []
    time_sk_kd = []
    time_sk_ball = []
    time_kdtree = []
    time_annoy = []
    time_flann = []
    time_brute_tot = time_sk_kd_tot = time_sk_ball_tot = time_kdtree_tot = time_annoy_tot = time_flann_tot = 0

    kdtree_tree = None
    for items in xrange(test_start, test_end):
        print "item:", items

        ground_truth = np.zeros((test_num_for_each, K), dtype=np.int32)
        time_brute_start = time.time()
        for no_test in xrange(test_num_for_each):
            distance_list = []
            current_state = test_states[items, no_test]
            for target in xrange(items):
                target_state = data_states[target]
                distance_list.append(DistanceNode(np.sum(np.absolute(current_state - target_state)**2), target))
            smallest = heapq.nsmallest(K, distance_list, key=lambda x: x.distance)
            ground_truth[no_test] = [x.index for x in smallest]
        time_brute_end = time.time()
        time_brute.append(time_brute_end - time_brute_start)
        time_brute_tot += time_brute[-1]
        # print ground_truth

        time_sk_kd_start = time.time()
        tree = KDTree(data_states[:items, :])
        dist, indices = tree.query(test_states[items], K)
        time_sk_kd_end = time.time()
        time_sk_kd.append(time_sk_kd_end - time_sk_kd_start)
        time_sk_kd_tot += time_sk_kd[-1]
        # print indices

        time_sk_ball_start = time.time()
        tree = BallTree(data_states[:items, :], 10000)
        dist, indices = tree.query(test_states[items], K)
        time_sk_ball_end = time.time()
        time_sk_ball.append(time_sk_ball_end - time_sk_ball_start)
        time_sk_ball_tot += time_sk_ball[-1]
        # print indices

        """
        annoy is absolutely disappointing for its low speed and poor accuracy.
        """
        time_annoy_start = time.time()
        annoy_result = np.zeros((test_num_for_each, K), dtype=np.int32)
        tree = AnnoyIndex(dimension_result)
        for i in xrange(items):
            tree.add_item(i, data_states[i, :])
        tree.build(10)
        for no_test in xrange(test_num_for_each):
            current_state = test_states[items, no_test]
            annoy_result[no_test] = tree.get_nns_by_vector(current_state, K)
        time_annoy_end = time.time()
        time_annoy.append(time_annoy_end - time_annoy_start)
        time_annoy_tot += time_annoy[-1]
        # print annoy_result
        # print annoy_result - indices

        """
        flann is still not very ideal
        """

        time_flann_start = time.time()
        flann = FLANN()
        result, dist = flann.nn(data_states[:items, :], test_states[items], K, algorithm='kdtree', trees=10, checks=16)
        time_flann_end = time.time()
        time_flann.append(time_flann_end - time_flann_start)
        time_flann_tot += time_flann[-1]
        # print result-indices

        """
        This kdtree module is so disappointing!!!! It is 100 times slower than Sklearn and even slower than brute force,
        more over it even makes mistakes.

        This kdtree module supports online insertion and deletion. I thought it would be much faster than Sklearn
         KdTree which rebuilds the tree every time. But the truth is the opposite.
        """

        # time_kdtree_start = time.time()
        # if kdtree_tree is None:
        #     point_list = [MyTuple(data_states[i, :], i) for i in xrange(items)]
        #     kdtree_tree = kdtree.create(point_list)
        # else:
        #     point = MyTuple(data_states[items, :], items)
        #     kdtree_tree.add(point)
        # kdtree_result = np.zeros((test_num_for_each, K), dtype=np.int32)
        # for no_test in xrange(test_num_for_each):
        #     current_state = test_states[items, no_test]
        #     smallest = kdtree_tree.search_knn(MyTuple(current_state, -1), K)
        #     kdtree_result[no_test] = [x[0].data.pos for x in smallest]
        # time_kdtree_end = time.time()
        # time_kdtree.append(time_kdtree_end - time_kdtree_start)
        # time_kdtree_tot += time_kdtree[-1]
        # print kdtree_result
        # print kdtree_result-indices

    print 'brute force:', time_brute_tot
    print 'sklearn KDTree', time_sk_kd_tot
    print 'sklearn BallTree', time_sk_ball_tot
    print 'approximate annoy', time_annoy_tot
    print 'approximate flann', time_flann_tot
    print 'kdtree (deprecated)', time_kdtree_tot
Esempio n. 11
0
for n in ratio:
    vol = []
    com_num = []
    for i in range(50):
        ind = np.random.randint(N, size=n)
        dpos = dpos_org.iloc[ind]

        #Al_pos = dpos.loc[dpos.element == 'Al', :]
        Sc_pos = dpos.loc[dpos.element == 'Sc', :]
        data_Sc = Sc_pos.loc[:, ['x', 'y', 'z']].values

        #ind_al = np.random.randint(5000, size=len(Al_pos))
        #Al_pos =Al_pos.iloc[ind_al]

        ## Remove single atoms.
        results, dists = fl.nn(scale(data_Sc), scale(data_Sc), 8)   # calculating the distance to 10 nearest neighbors
        cov_dists = np.asarray([np.std(d[1:]) for d in dists])        # Calculating the covariance to the distances

        viz = False
        if viz is True:
            fig = plt.figure()
            mng = plt.get_current_fig_manager()
            mng.full_screen_toggle()
            ax = fig.add_subplot(111)
            ax.hist(cov_dists)
            ax.set_xlabel('Covariance of Nearest Neighbor Distance')
            ax.set_ylabel('Frequency')
            ax.xaxis.label.set_size(26)
            ax.yaxis.label.set_size(26)
            for xtick, ytick in zip(ax.xaxis.get_major_ticks(), ax.yaxis.get_major_ticks()):
                            xtick.label.set_fontsize(20)
Esempio n. 12
0
class Test_PyFLANN_nn(unittest.TestCase):
    def setUp(self):
        self.nn = FLANN()

    ##########################################################################
    # The typical

    def test_nn_2d_10pt_kmeans(self):
        self.__nd_random_test(2, 2, algorithm='kdtree')

    def test_nn_2d_1000pt_kmeans(self):
        self.__nd_random_test(2, 1000, algorithm='kmeans')

    def test_nn_100d_1000pt_kmeans(self):
        self.__nd_random_test(100, 1000, algorithm='kmeans')

    def test_nn_500d_100pt_kmeans(self):
        self.__nd_random_test(500, 100, algorithm='kmeans')

    def test_nn_2d_1000pt_kdtree(self):
        self.__nd_random_test(2, 1000, algorithm='kdtree')

    def test_nn_100d_1000pt_kdtree(self):
        self.__nd_random_test(100, 1000, algorithm='kdtree')

    def test_nn_500d_100pt_kdtree(self):
        self.__nd_random_test(500, 100, algorithm='kdtree')

    def test_nn_2d_1000pt_linear(self):
        self.__nd_random_test(2, 1000, algorithm='linear')

    def test_nn_100d_50pt_linear(self):
        self.__nd_random_test(100, 50, algorithm='linear')

    def test_nn_2d_1000pt_composite(self):
        self.__nd_random_test(2, 1000, algorithm='composite')

    def test_nn_100d_1000pt_composite(self):
        self.__nd_random_test(100, 1000, algorithm='composite')

    def test_nn_500d_100pt_composite(self):
        self.__nd_random_test(500, 100, algorithm='composite')

    def test_nn_multtrees_2d_1000pt_kmeans(self):
        self.__nd_random_test(2, 1000, algorithm='kmeans', trees=8)

    def test_nn_multtrees_100d_1000pt_kmeans(self):
        self.__nd_random_test(100, 1000, algorithm='kmeans', trees=8)

    def test_nn_multtrees_500d_100pt_kmeans(self):
        self.__nd_random_test(500, 100, algorithm='kmeans', trees=8)

    ##########################################################################
    # Stress it should handle

    def test_nn_stress_1d_1pt_kmeans(self):
        self.__nd_random_test(1, 1, algorithm='kmeans')

    def test_nn_stress_1d_1pt_linear(self):
        self.__nd_random_test(1, 1, algorithm='linear')

    def test_nn_stress_1d_1pt_kdtree(self):
        self.__nd_random_test(1, 1, algorithm='kdtree')

    def test_nn_stress_1d_1pt_composite(self):
        self.__nd_random_test(1, 1, algorithm='composite')

    def __nd_random_test(self,
                         dim,
                         N,
                         type=np.float32,
                         num_neighbors=10,
                         **kwargs):
        """
        Make a set of random points, then pass the same ones to the
        query points.  Each point should be closest to itself.
        """
        np.random.seed(0)
        x = np.array(np.random.rand(N, dim), dtype=type)
        perm = np.random.permutation(N)

        idx, dists = self.nn.nn(x, x[perm], **kwargs)
        self.assertTrue(all(idx == perm))

        # Make sure it's okay if we do make all the points equal
        x_mult_nn = np.concatenate([x for i in range(num_neighbors)])
        nidx, ndists = self.nn.nn(x_mult_nn,
                                  x,
                                  num_neighbors=num_neighbors,
                                  **kwargs)

        correctness = 0.0

        for i in range(N):
            correctness += (float(
                len(
                    set(nidx[i]).intersection(
                        [i + n * N
                         for n in range(num_neighbors)]))) / num_neighbors)

        self.assertTrue(
            correctness / N >= 0.99,
            'failed #1: N=%d,correctness=%f' % (N, correctness / N),
        )

        # now what happens if they are slightly off
        x_mult_nn += (np.random.randn(x_mult_nn.shape[0], x_mult_nn.shape[1]) *
                      0.0001 / dim)
        n2idx, n2dists = self.nn.nn(x_mult_nn,
                                    x,
                                    num_neighbors=num_neighbors,
                                    **kwargs)

        for i in range(N):
            correctness += (float(
                len(
                    set(n2idx[i]).intersection(
                        [i + n * N
                         for n in range(num_neighbors)]))) / num_neighbors)

        self.assertTrue(
            correctness / N >= 0.99,
            'failed #2: N=%d,correctness=%f' % (N, correctness / N),
        )
Esempio n. 13
0
files = os.listdir(src)
filenames = []
vectors = []

for f in files:
    # need to convert into list of vectors
    if f.endswith('.npy'):
        filenames.append(f[:-8])
        data = np.load(src + '/' + f)
        # print data.shape
        #vectors.append(np.reshape(data, (14 * 14, 512)))
        vectors.append(np.reshape(data, (28 * 28, 512)))

flann = FLANN()
distMap = {}
if mode == 'chamfer':
    for i in range(0, len(filenames)):
        print('Computing chamfer for file ' + str(i))
        imgDists = {}
        dataset = vectors[i]
        for j in range(0, len(filenames)):
            testset = vectors[j]
            _, dists = flann.nn(dataset, testset, 1)
            _, rdists = flann.nn(testset, dataset, 1)
            imgDists[filenames[j]] = np.asscalar(
                np.sum(dists) + np.sum(rdists))

        distMap[filenames[i]] = imgDists

with open(src + '/dists.json', 'w') as outfile:
    json.dump(distMap, outfile, sort_keys=True, indent=2)
Esempio n. 14
0
class Test_PyFLANN_nn(unittest.TestCase):
    def setUp(self):
        self.nn = FLANN(log_level='warning')

    ##########################################################################
    # The typical

    def test_nn_2d_2pt(self):
        self.__nd_random_test_autotune(2, 2)

    def test_nn_autotune_2d_10pt(self):
        self.__nd_random_test_autotune(2, 10)

    # def test_nn_autotune_100d_1000pt(self):
    #     self.__nd_random_test_autotune(100, 1000)

    # def test_nn_autotune_500d_100pt(self):
    #     self.__nd_random_test_autotune(500, 100)

    #
    #    ####################################################################
    #    # Stress it should handle
    #
    def test_nn_stress_1d_1pt_kmeans_autotune(self):
        self.__nd_random_test_autotune(1, 1)

    def __ensure_list(self, arg):
        if not isinstance(arg, list):
            return [arg]
        else:
            return arg

    def __nd_random_test_autotune(self, dim, N, num_neighbors=1, **kwargs):
        """
        Make a set of random points, then pass the same ones to the
        query points.  Each point should be closest to itself.
        """
        np.random.seed(0)
        x = np.random.rand(N, dim)
        xq = np.random.rand(N, dim)
        # perm = np.random.permutation(N)

        # compute ground truth nearest neighbors
        gt_idx, gt_dist = self.nn.nn(x,
                                     xq,
                                     algorithm='linear',
                                     num_neighbors=num_neighbors)

        for tp in [0.70, 0.80, 0.90]:
            nidx, ndist = self.nn.nn(x,
                                     xq,
                                     algorithm='autotuned',
                                     sample_fraction=1.0,
                                     num_neighbors=num_neighbors,
                                     target_precision=tp,
                                     checks=-2,
                                     **kwargs)

            correctness = 0.0
            for i in range(N):
                l1 = self.__ensure_list(nidx[i])
                l2 = self.__ensure_list(gt_idx[i])
                correctness += float(len(
                    set(l1).intersection(l2))) / num_neighbors
            correctness /= N
            self.assertTrue(
                correctness >= tp * 0.9,
                'failed #1: targ_prec=%f, N=%d,correctness=%f' %
                (tp, N, correctness),
            )
Esempio n. 15
0
from pyflann import FLANN
import numpy as np

# the base points
dataset = np.array(
    [[1., 1, 1, 2, 3],
     [10, 10, 10, 3, 2],
     [100, 100, 2, 30, 1]
     ])
# the points to measure
testset = np.array(
    [[1., 1, 1, 1, 1],
     [90, 90, 10, 10, 1]
     ])
flann = FLANN()
result, dists = flann.nn(
    dataset, testset, 2, algorithm="kmeans", branching=32, iterations=7, checks=16)
# the result is for each point in the testset the 2 (because on the config) closest points from the dataset
print(result)
print(dists)

print("-----")
dataset = np.random.rand(10000, 128)  # 10 000 points with 128 dimensions
testset = np.random.rand(1000, 128)  # 1 000 points with 128 dimensions
flann = FLANN()
result, dists = flann.nn(
    dataset, testset, 5, algorithm="kmeans", branching=32, iterations=7, checks=16)
print(result)
print(dists)
print(np.shape(dists))
def make_test(test_start=1000, test_end=1050):
    f1 = open('states.pkl', 'r')
    f2 = open('states_for_test.pkl', 'r')
    data_states = cPickle.load(f1)
    test_states = cPickle.load(f2)
    f1.close()
    f2.close()

    time_brute = []
    time_sk_kd = []
    time_sk_ball = []
    time_kdtree = []
    time_annoy = []
    time_flann = []
    time_brute_tot = time_sk_kd_tot = time_sk_ball_tot = time_kdtree_tot = time_annoy_tot = time_flann_tot = 0

    kdtree_tree = None
    for items in xrange(test_start, test_end):
        print "item:", items

        ground_truth = np.zeros((test_num_for_each, K), dtype=np.int32)
        time_brute_start = time.time()
        for no_test in xrange(test_num_for_each):
            distance_list = []
            current_state = test_states[items, no_test]
            for target in xrange(items):
                target_state = data_states[target]
                distance_list.append(
                    DistanceNode(
                        np.sum(np.absolute(current_state - target_state)**2),
                        target))
            smallest = heapq.nsmallest(K,
                                       distance_list,
                                       key=lambda x: x.distance)
            ground_truth[no_test] = [x.index for x in smallest]
        time_brute_end = time.time()
        time_brute.append(time_brute_end - time_brute_start)
        time_brute_tot += time_brute[-1]
        # print ground_truth

        time_sk_kd_start = time.time()
        tree = KDTree(data_states[:items, :])
        dist, indices = tree.query(test_states[items], K)
        time_sk_kd_end = time.time()
        time_sk_kd.append(time_sk_kd_end - time_sk_kd_start)
        time_sk_kd_tot += time_sk_kd[-1]
        # print indices

        time_sk_ball_start = time.time()
        tree = BallTree(data_states[:items, :], 10000)
        dist, indices = tree.query(test_states[items], K)
        time_sk_ball_end = time.time()
        time_sk_ball.append(time_sk_ball_end - time_sk_ball_start)
        time_sk_ball_tot += time_sk_ball[-1]
        # print indices
        """
        annoy is absolutely disappointing for its low speed and poor accuracy.
        """
        time_annoy_start = time.time()
        annoy_result = np.zeros((test_num_for_each, K), dtype=np.int32)
        tree = AnnoyIndex(dimension_result)
        for i in xrange(items):
            tree.add_item(i, data_states[i, :])
        tree.build(10)
        for no_test in xrange(test_num_for_each):
            current_state = test_states[items, no_test]
            annoy_result[no_test] = tree.get_nns_by_vector(current_state, K)
        time_annoy_end = time.time()
        time_annoy.append(time_annoy_end - time_annoy_start)
        time_annoy_tot += time_annoy[-1]
        # print annoy_result
        # print annoy_result - indices
        """
        flann is still not very ideal
        """

        time_flann_start = time.time()
        flann = FLANN()
        result, dist = flann.nn(data_states[:items, :],
                                test_states[items],
                                K,
                                algorithm='kdtree',
                                trees=10,
                                checks=16)
        time_flann_end = time.time()
        time_flann.append(time_flann_end - time_flann_start)
        time_flann_tot += time_flann[-1]
        # print result-indices
        """
        This kdtree module is so disappointing!!!! It is 100 times slower than Sklearn and even slower than brute force,
        more over it even makes mistakes.

        This kdtree module supports online insertion and deletion. I thought it would be much faster than Sklearn
         KdTree which rebuilds the tree every time. But the truth is the opposite.
        """

        # time_kdtree_start = time.time()
        # if kdtree_tree is None:
        #     point_list = [MyTuple(data_states[i, :], i) for i in xrange(items)]
        #     kdtree_tree = kdtree.create(point_list)
        # else:
        #     point = MyTuple(data_states[items, :], items)
        #     kdtree_tree.add(point)
        # kdtree_result = np.zeros((test_num_for_each, K), dtype=np.int32)
        # for no_test in xrange(test_num_for_each):
        #     current_state = test_states[items, no_test]
        #     smallest = kdtree_tree.search_knn(MyTuple(current_state, -1), K)
        #     kdtree_result[no_test] = [x[0].data.pos for x in smallest]
        # time_kdtree_end = time.time()
        # time_kdtree.append(time_kdtree_end - time_kdtree_start)
        # time_kdtree_tot += time_kdtree[-1]
        # print kdtree_result
        # print kdtree_result-indices

    print 'brute force:', time_brute_tot
    print 'sklearn KDTree', time_sk_kd_tot
    print 'sklearn BallTree', time_sk_ball_tot
    print 'approximate annoy', time_annoy_tot
    print 'approximate flann', time_flann_tot
    print 'kdtree (deprecated)', time_kdtree_tot