Example #1
0
    def fit(self, X):
        if X.dtype != numpy.float32:
            X = X.astype(numpy.float32)
        if self._metric == 'angular':
            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')

        self._index_autotuned = mrpt.MRPTIndex(X)
        self._index_autotuned.build_autotune_sample(target_recall = None, k = self._k, n_test = 1000)
Example #2
0
    def fit(self, X):
        if self._metric == 'angular':
            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')

        self._index = mrpt.MRPTIndex(X,
                                     depth=self._depth,
                                     n_trees=self._n_trees)
        self._index.build()
Example #3
0
        def RunAnnMrpt():
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
            queryData = np.genfromtxt(self.dataset[1], delimiter=',')
            train, label = SplitTrainData(self.dataset)

            # Get all the parameters.
            if "k" in options:
                k = int(options.pop("k"))
                if (k < 1 or k > referenceData.shape[0]):
                    Log.Fatal("Invalid k: " + k.group(1) +
                              "; must be greater than 0" +
                              " and less or equal than " +
                              str(referenceData.shape[0]))
                    return -1
            else:
                Log.Fatal(
                    "Required option: Number of furthest neighbors to find.")
                return -1

            build_dict = {}
            run_dict = {}
            if "num_trees" in options:
                build_dict["n_trees"] = int(options.pop("num_trees"))
            else:
                Log.Fatal("Required option: Number of trees to build")
                return -1

            if "depth" in options:
                build_dict["depth"] = int(options.pop("depth"))
            else:
                build_dict["depth"] = 2  # Not sure... just a default...
            if "votes_required" in options:
                run_dict["votes_required"] = int(options.pop("votes_required"))

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            with totalTimer:
                try:
                    # Perform Approximate Nearest-Neighbors.
                    acc = 0
                    index = mrpt.MRPTIndex(np.float32(train), **build_dict)
                    index.build()
                    approximate_neighbors = np.zeros((len(queryData), k))
                    for i in range(len(queryData)):
                        approximate_neighbors[i] = index.ann(
                            np.float32(queryData[i]), k, **run_dict)
                except Exception as e:
                    return -1

            return totalTimer.ElapsedTime()
Example #4
0
    def metric(self):
        totalTimer = Timer()
        with totalTimer:
            index = mrpt.MRPTIndex(np.float32(self.data[0]))
            index.build(**self.build_dict)
            neighbors = np.zeros((len(self.data[1]), self.k))
            for i in range(len(self.data[1])):
                neighbors[i] = index.ann(np.float32(self.data[1][i]), self.k,
                                         **self.run_dict)

        metric = {}
        metric["runtime"] = totalTimer.ElapsedTime()
        return metric
Example #5
0
        def RunAnnMrpt(q):
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            referenceData = np.genfromtxt(self.dataset[0], delimiter=',')
            queryData = np.genfromtxt(self.dataset[1], delimiter=',')
            train, label = SplitTrainData(self.dataset)

            # Get all the parameters.
            k = re.search("-k (\d+)", options)
            n = re.search("-n (\d+)", options)  # Number of trees.
            d = re.search("-d (\d+)", options)  # The tree depth.
            v = re.search("-v (\d+)", options)  # Number of votes_required.

            if not k:
                Log.Fatal(
                    "Required option: Number of furthest neighbors to find.")
                q.put(-1)
                return -1
            else:
                k = int(k.group(1))
                if (k < 1 or k > referenceData.shape[0]):
                    Log.Fatal("Invalid k: " + k.group(1) +
                              "; must be greater than 0" +
                              " and less or equal than " +
                              str(referenceData.shape[0]))
                    q.put(-1)
                    return -1
            if not n:
                Log.Fatal("Required option: Number of trees to build")
                q.put(-1)
                return -1
            else:
                n = int(n.group(1))

            d = 5 if not d else int(d.group(1))
            v = 4 if not v else int(v.group(1))

            with totalTimer:
                try:
                    # Perform Approximate Nearest-Neighbors.
                    acc = 0
                    index = mrpt.MRPTIndex(np.float32(train),
                                           depth=d,
                                           n_trees=n)
                    index.build()
                    approximate_neighbors = np.zeros((len(queryData), k))
                    for i in range(len(queryData)):
                        approximate_neighbors[i] = index.ann(np.float32(
                            queryData[i]),
                                                             k,
                                                             votes_required=v)
                except Exception as e:
                    Log.Info(e)
                    q.put(-1)
                    return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Example #6
0
        reacll.append(annoyRecall)
        algorithm.append('Annoy-trees-' + str(numTrees))
        construciotnTimes.append(constructionTime)
        searchTimes.append(searchTime)
        avgdistances.append(avgDist)

#mrpt multi RP-tree
import mrpt

for a in [(0.5, 5), (0.6, 6), (0.8, 8), (0.9, 10)]:
    m = a[1]
    target_recall = a[0]

    startTime = time.perf_counter()
    index = mrpt.MRPTIndex(train.astype(np.float32))
    index.build_autotune_sample(0.65, k, trees_max=10)
    end_time = time.perf_counter()
    constructionTime = end_time - startTime

    mrtpquery = query.astype(np.float32)

    rez = []
    dist = []
    startTime = time.perf_counter()

    for q in mrtpquery:
        res, d = index.ann(q, return_distances=True)
        rez.append(res)
        dist.append(d)
    end_time = time.perf_counter()
Example #7
0
# Generate synthetic test data
k = 10; n_queries = 100
data = np.dot(np.random.rand(1e5,5), np.random.rand(5,100)).astype('float32')
queries = np.dot(np.random.rand(n_queries,5), np.random.rand(5,100)).astype('float32')

# Solve exact nearest neighbors with standard methods from scipy and numpy for reference
exact_search_time = time()
exact_neighbors = np.zeros((n_queries, k))
for i in range(n_queries):
    exact_neighbors[i] = np.argsort(cdist([queries[i]], data))[0,:k]
exact_search_time = time() - exact_search_time

# Offline phase: Indexing the data. This might take some time.
indexing_time = time()
index = mrpt.MRPTIndex(data, depth=5, n_trees=100)
index.build()
indexing_time = time() - indexing_time

# Online phase: Finding nearest neighbors stupendously fast.
approximate_search_time = time()
approximate_neighbors = np.zeros((n_queries, k))
for i in range(n_queries):
    approximate_neighbors[i] = index.ann(queries[i], k, votes_required=4)
approximate_search_time = time() - approximate_search_time

# Print some stats
print ('Indexing time: %1.3f seconds' %indexing_time)
print ('%d approximate queries time: %1.3f seconds' %(n_queries, approximate_search_time))
print ('%d exact queries time: %1.3f seconds' %(n_queries, exact_search_time))
Example #8
0
 def fit(self, X):
     self._index = mrpt.MRPTIndex(X,
                                  depth=self._depth,
                                  n_trees=self._n_trees)
     self._index.build()