class KNNRegressor: def __init__(self, k, strategy='my_own', metric='euclidean', mode='uniform'): if not isinstance(k, int) or k < 1: raise AttributeError('Incorrect "k" parameter') if not isinstance(mode, str) or mode != 'uniform' and mode != 'distance': raise AttributeError('Mode parameter can be uniform or distance only') self.mode = mode self.k = k self.strategy = strategy self.metric = metric self.training_labels = None if strategy == 'my_own': self.training_data = None else: self.nn = NearestNeighbors(n_neighbors=k, algorithm=strategy, leaf_size=30, metric=metric) def fit(self, x, y): if x.shape[0] != y.shape[0]: raise AttributeError('Mismatch between training set and its labels') self.training_labels = y if self.strategy == 'my_own': self.training_data = x else: self.nn.fit(x) def find_kneighbors(self, x, return_distance=True): if not isinstance(return_distance, bool): raise AttributeError('Incorrect "return_distance" parameter') if self.strategy == 'my_own': if self.metric == 'euclidean': dist_matrix = distances.euclidean_distance(x, self.training_data) elif self.metric == 'cosine': dist_matrix = distances.cosine_distance(x, self.training_data) else: dist_matrix = self.metric(self.training_data, x).astype(np.float64).T if not return_distance: res_index = np.empty(dist_matrix.shape[0], dtype=np.int64) tmp_index = np.empty(dist_matrix.shape[0], dtype=np.int64) np.argmin(dist_matrix, axis=1, out=res_index) dist_matrix[np.arange(dist_matrix.shape[0]), res_index] = np.inf res_index = res_index.reshape((-1, 1)) for i in range(self.k - 1): np.argmin(dist_matrix, axis=1, out=tmp_index) dist_matrix[np.arange(dist_matrix.shape[0]), tmp_index] = np.inf res_index = np.hstack((res_index, tmp_index[:, np.newaxis])) return res_index else: res_index = np.empty(dist_matrix.shape[0], dtype=np.int64) tmp_index = np.empty(dist_matrix.shape[0], dtype=np.int64) np.argmin(dist_matrix, axis=1, out=res_index) res_dist = dist_matrix[np.arange(dist_matrix.shape[0]), res_index] dist_matrix[np.arange(dist_matrix.shape[0]), res_index] = np.inf res_index = res_index.reshape((-1, 1)) res_dist = res_dist.reshape((-1, 1)) for i in range(self.k - 1): np.argmin(dist_matrix, axis=1, out=tmp_index) res_dist = np.hstack((res_dist, dist_matrix[np.arange(dist_matrix.shape[0]), tmp_index][:, np.newaxis])) dist_matrix[np.arange(dist_matrix.shape[0]), tmp_index] = np.inf res_index = np.hstack((res_index, tmp_index[:, np.newaxis])) return res_dist, res_index else: return self.nn.kneighbors(x, return_distance=return_distance) def predict(self, x, k=None): if k is not None: if not isinstance(k, int) or k < 1: raise AttributeError('Incorrect "k" parameter') else: if self.strategy == 'my_own': self.k = k else: params = self.nn.get_params() params['n_neighbors'] = k self.nn = self.nn.set_params(**params) if self.mode == 'uniform': nn_index = self.training_labels[self.find_kneighbors(x, return_distance=False)] return np.mean(nn_index, axis=1) else: vec_weight = np.vectorize(lambda z: 1 / (z + 0.00001)) nn_dist, nn_index = self.find_kneighbors(x) nn_index = self.training_labels[nn_index] nn_dist = vec_weight(nn_dist) return np.sum(nn_index * nn_dist, axis=1) / np.sum(nn_dist, axis=1)
from sklearn.preprocessing import normalize from sklearn.neighbors import NearestNeighbors from libretrieval.features.io import load_features k = 10 features = load_features("/home/alberto/phD/datasets/places365/vgg16/features/collection/") features_l2 = normalize(features, 'l2', axis=1) vars = np.var(features, axis=0, dtype=np.float64) print(vars.dtype) q_features = load_features("/home/alberto/phD/datasets/places365/vgg16/features/query/") q_features_l2 = normalize(q_features, 'l2', axis=1) brute_ngbrs = NearestNeighbors(n_neighbors=10000, algorithm='brute', metric=sys.argv[1], n_jobs=1) _ = brute_ngbrs.fit(features) print(brute_ngbrs.get_params()) ts = time.perf_counter() print("Brute Query -- ", end='', flush=False) dists_sk, indices_sk = brute_ngbrs.kneighbors(q_features[0:k], n_neighbors=10000) print(" Elapsed {0:0.8f}s".format(time.perf_counter() - ts))
class Wolpertinger(object): def __init__(self, env, i=1, nn_index_file=None, action_set=None): FLAGS.nn_index_file = nn_index_file if nn_index_file is not None else FLAGS.nn_index_file # FLAGS.create_index = create_index if create_index is not None else FLAGS.create_index self.__env = env self.__flann_params = None # Discretization of the action set if action_set is None: self.__action_space = env.action_space shape = self.__action_space.shape[0] lower = self.__action_space.low higher = self.__action_space.high i = i + 1 if i == 1 else i self.__A = np.zeros((i, shape), dtype=lower.dtype) for d in np.arange(shape): low = lower[d] high = higher[d] self.__A[..., d] = np.linspace(low, high, dtype=low.dtype, num=i) else: # custom action set self.__A = action_set if FLAGS.knn_backend == 'pyflann': self._use_pyflann() else: self._use_sklean() def _use_pyflann(self): """ Use Pyflann library for Knn index generation :return: """ self.__flann = fl.FLANN() fl.set_distance_type('euclidean') # L2 norm distance if FLAGS.create_index: self.__flann_params = self.__flann.build_index( self.__A, algorithm="autotuned", target_precision=FLAGS.nn_target_precision, log_level="info") print( 'KNN index created with auto-tuned configuration params: {}.'. format(str(self.__flann_params))) if FLAGS.nn_index_file is not None: self.__flann.save_index(FLAGS.nn_index_file) print('KNN index file stored in {}'.format( FLAGS.nn_index_file)) elif FLAGS.nn_index_file is not None: self.__flann.load_index(FLAGS.nn_index_file, self.__A) else: raise Exception( "Error in parameter configuration. Index was not created/loaded" ) print( "Wolpertinger policy configuration: \n " + "Create index: {} \n Target Precision: {} \n knn: {} \n Index File: {} \n # points: {} \n Checks: {}" .format( FLAGS.create_index, FLAGS.nn_target_precision, FLAGS.knn, FLAGS.nn_index_file, self.__A.shape[0], self.__flann_params["checks"] if self.__flann_params else FLAGS.knn_checks)) def _use_sklean(self): """ Use sklearn lib for NearestNeighbors index generation :return: """ if FLAGS.create_index: self.__alg = NearestNeighbors(algorithm='auto', leaf_size=30, metric='euclidean', n_jobs=-1) self.__alg.fit(self.__A) joblib.dump(self.__alg, 'knn-model.pkl') else: self.__alg = joblib.load('knn-model.pkl') print "Knn index {} using sklearn backend. Configuration params: {}".format( "created" if FLAGS.create_index else "loaded", self.__alg.get_params()) # action = np.loadtxt("testvector") # rs = self.g(action, k=194) # print "done" def g(self, action, k=None): ''' Function g that returns the k-nearest-neighbors for a given continuous action Args: action: continuous action Returns: k-nearest-neighbors ''' nearest_neighbors = k if k is not None else FLAGS.knn checks = self.__flann_params[ "checks"] if self.__flann_params else FLAGS.knn_checks if FLAGS.knn_backend == "pyflann": results, dists = self.__flann.nn_index(action, nearest_neighbors, checks=checks) else: dists, results = self.__alg.kneighbors([action], nearest_neighbors) results = results[0] return [self.__A[val] for val in results] @property def discrete_actions(self): return self.__A