def _balltree(*, train, test, x_predict=None, metrics, X, leaf_size=40, metric='minkowski', **kwargs): """ For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.BallTree.html#sklearn.neighbors.BallTree """ model = BallTree(X, leaf_size=leaf_size, metric=metric, **kwargs) model.fit(train[0], train[1]) model_name = 'Ball Tree' y_hat = model.predict(test[0]) if metrics == 'accuracy': accuracy = accuracy_score(test[1], y_hat) if metrics == 'f1': accuracy = f1_score(test[1], y_hat) if metrics == 'jaccard': accuracy = jaccard_score(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
def kde_logarithmic_knn(self, X: np.ndarray, **kwargs) -> np.ndarray: """ Parameters ---------- X kwargs Returns ------- """ # -- constants -- _allowed_algorithms = {'BallTree', 'NearestNeighbors'} # -- defaults -- _algorithm = kwargs.get('knn_algorithm', 'BallTree') assert _algorithm in _allowed_algorithms, f'The algorithm name you provided: {_algorithm} is not among ' \ f'allowed knn algorithms: {_allowed_algorithms}' _log_regularizer = kwargs.get('log_regularizer', 0.0) _leaf_size = kwargs.get('leaf_size', 6) _n_neighbors = kwargs.get('knn_n_neighbors', 6) # -- code -- if _algorithm == 'BallTree': # create the local graph _graph = BallTree(X, leaf_size=_leaf_size) elif _algorithm == 'NearestNeighbors': # create the local graph _graph = NearestNeighbors(n_neighbors=_n_neighbors, leaf_size=_leaf_size, algorithm='ball_tree', p=2, n_jobs=os.cpu_count() - 1) # fit graph _graph.fit(X) # define the query function, so to be consistent with other functions _graph.query = lambda X, k: _graph.kneighbors(X=X, n_neighbors=k) else: raise ValueError('Something horrible has happened, contact the project owner!') # -- functions -- def _log_density_estimator(in_vector: np.ndarray, k: int = _n_neighbors) -> float: return np.log(k) - np.log(X.shape[0]) - \ X.shape[1] * np.log(_log_regularizer + _graph.query(in_vector, k=k)[0][0][-1]) # -- code -- # it might happen that the `_log_density_estimator` is negative, so you must shift it _densities = np.array(list(map(lambda ind: _log_density_estimator(X[ind:ind+1]), range(X.shape[0])))) return _densities + np.abs(np.min((0.0, np.min(_densities))))
def predict(self, X): ball_tree = BallTree() ball_tree.fit(self.cluster_centers_) _, indexes = ball_tree.query(X) result = [] for idx, in indexes: result.append(self.labels_[idx]) return result