def fit(self, X): if self.hash_args: self.index = puffinn.Index(self.metric, X.shape[1], self.space,\ hash_function=self.hash_function, hash_source=self.hash_source,\ hash_args=self.hash_args) else: self.index = puffinn.Index(self.metric, X.shape[1], self.space,\ hash_function=self.hash_function, hash_source=self.hash_source) for i, x in enumerate(X): self.index.insert(x.tolist()) self.index.rebuild(10)
def fit(self, X): if self.hash_args: self.index = puffinn.Index(self.metric, len(X[0]), self.space,\ hash_function=self.hash_function, hash_source=self.hash_source,\ hash_args=self.hash_args) else: self.index = puffinn.Index(self.metric, len(X[0]), self.space,\ hash_function=self.hash_function, hash_source=self.hash_source) for i, x in enumerate(X): if self.metric == 'angular': x = x.tolist() self.index.insert(x) self.index.rebuild(10)
def fit(self, X): if self.metric == 'angular': dimensions = len(X[0]) else: dimensions = 0 for x in X: dimensions = max(dimensions, max(x)+1) if self.hash_args: self.index = puffinn.Index(self.metric, dimensions, self.space,\ hash_function=self.hash_function, hash_source=self.hash_source,\ hash_args=self.hash_args) else: self.index = puffinn.Index(self.metric, dimensions, self.space,\ hash_function=self.hash_function, hash_source=self.hash_source) for i, x in enumerate(X): x = x.tolist() self.index.insert(x) self.index.rebuild()
def fit(self, X, y=None) -> PuffinnLSH: """ Build the puffinn LSH index and insert data from X. Parameters ---------- X: np.array Data to be indexed y: any Ignored Returns ------- self: Puffinn An instance of Puffinn with a built index """ if y is None: X = check_array(X) else: X, y = check_X_y(X, y) self.y_train_ = y if self.metric not in self.valid_metrics: warnings.warn( f'Invalid metric "{self.metric}". Using "euclidean" instead') self.metric = 'euclidean' try: self.effective_metric = self.metric_map[self.metric] except KeyError: self.effective_metric = self.metric # Reduce default memory consumption for unit tests if "pytest" in sys.modules: self.memory = 3 * 1024**2 # Construct the index index = puffinn.Index( self.effective_metric, X.shape[1], self.memory, ) if self.verbose: iter_X = tqdm(X, desc='Indexing', total=len(X)) else: iter_X = X for v in iter_X: index.insert(v.tolist()) index.rebuild(num_threads=self.n_jobs) self.index_ = index self.X_train_ = X # remove, once we can retrieve vectors from the index itself return self
def fit(self, X, y=None): """ Build the puffinn LSH index and insert data from X. Parameters ---------- X: np.array Data to be indexed y: any Ignored Returns ------- self: Puffinn An instance of Puffinn with a built index """ if y is None: X = check_array(X) else: X, y = check_X_y(X, y) self.y_train_ = y if self.metric not in self.valid_metrics: warnings.warn( f'Invalid metric "{self.metric}". Using "euclidean" instead') self.metric = 'euclidean' try: self._effective_metric = self.metric_map[self.metric] except KeyError: self._effective_metric = self.metric # Larger memory means many iterations (time-recall trade-off) memory = max(np.multiply(*X.shape) * 8 * 500, 1024**2) if self.memory is not None: memory = max(self.memory, memory) # Construct the index index = puffinn.Index( self._effective_metric, X.shape[1], memory, ) disable_tqdm = False if self.verbose else True for v in tqdm(X, desc='Indexing', disable=disable_tqdm): index.insert(v.tolist()) index.rebuild() self.index_ = index self.n_indexed_ = X.shape[0] self.X_indexed_norm_ = np.linalg.norm(X, ord=2, axis=1).reshape(-1, 1) return self
def create_index(): index = puffinn.Index('angular', 10, 1024**2) print(index) return True