def fit(self, X): X = X.astype(numpy.float32) d = len(X[0]) self.m = findmean(X, d, 10) X = isotropize(X, d, self.m) hypercube_dim = int(numpy.log2(len(X))) - 2 self._index = Dolphinn(X, d, hypercube_dim)
def fit(self, X): if X.dtype != numpy.float32: X = numpy.array(X, dtype=numpy.float32) d = X.shape[1] self.m = findmean(X, d, 10) X = isotropize(X, d, self.m) hypercube_dim = int(numpy.log2(len(X))) - 2 self._index = Dolphinn(X, d, hypercube_dim)
from dolphinn import * num_of_probes = 20 ########################### M = 1 ########################## #READ FILES #D1: data dimension, P: dataset #D2: query dimension, Q: queryset (D1, P) = fr.fvecs_read("siftsmall/siftsmall_base.fvecs") (D2, Q) = fr.fvecs_read("siftsmall/siftsmall_query.fvecs") if D1 != D2: raise IOError("Data points and query points are of different dimension") D = D1 #CHANGE OF ORIGIN #find the mean of randomly sampled points m = fr.findmean(P, D, 10) #then consider this mean as the origin P = fr.isotropize(P, D, m) Q = fr.isotropize(Q, D, m) K = int(np.log2(len(P))) - 2 ########################## print "New dimension K=", K #PREPROCESSING tic = time.clock() dol = Dolphinn(P, D, K) toc = time.clock() print "Preprocessing time: ", toc - tic #QUERIES tic = time.clock() #assign keys to queries