Example #1
0
class KNNRegressor:

    def __init__(self, k, strategy='my_own', metric='euclidean', mode='uniform'):
        if not isinstance(k, int) or k < 1:
            raise AttributeError('Incorrect "k" parameter')
        if not isinstance(mode, str) or mode != 'uniform' and mode != 'distance':
            raise AttributeError('Mode parameter can be uniform or distance only')

        self.mode = mode
        self.k = k
        self.strategy = strategy
        self.metric = metric
        self.training_labels = None
        if strategy == 'my_own':
            self.training_data = None
        else:
            self.nn = NearestNeighbors(n_neighbors=k, algorithm=strategy, leaf_size=30, metric=metric)

    def fit(self, x, y):
        if x.shape[0] != y.shape[0]:
            raise AttributeError('Mismatch between training set and its labels')

        self.training_labels = y
        if self.strategy == 'my_own':
            self.training_data = x
        else:
            self.nn.fit(x)

    def find_kneighbors(self, x, return_distance=True):
        if not isinstance(return_distance, bool):
            raise AttributeError('Incorrect "return_distance" parameter')

        if self.strategy == 'my_own':
            if self.metric == 'euclidean':
                dist_matrix = distances.euclidean_distance(x, self.training_data)
            elif self.metric == 'cosine':
                dist_matrix = distances.cosine_distance(x, self.training_data)
            else:
                dist_matrix = self.metric(self.training_data, x).astype(np.float64).T
            if not return_distance:
                res_index = np.empty(dist_matrix.shape[0], dtype=np.int64)
                tmp_index = np.empty(dist_matrix.shape[0], dtype=np.int64)
                np.argmin(dist_matrix, axis=1, out=res_index)
                dist_matrix[np.arange(dist_matrix.shape[0]), res_index] = np.inf
                res_index = res_index.reshape((-1, 1))
                for i in range(self.k - 1):
                    np.argmin(dist_matrix, axis=1, out=tmp_index)
                    dist_matrix[np.arange(dist_matrix.shape[0]), tmp_index] = np.inf
                    res_index = np.hstack((res_index, tmp_index[:, np.newaxis]))
                return res_index
            else:
                res_index = np.empty(dist_matrix.shape[0], dtype=np.int64)
                tmp_index = np.empty(dist_matrix.shape[0], dtype=np.int64)
                np.argmin(dist_matrix, axis=1, out=res_index)
                res_dist = dist_matrix[np.arange(dist_matrix.shape[0]), res_index]
                dist_matrix[np.arange(dist_matrix.shape[0]), res_index] = np.inf
                res_index = res_index.reshape((-1, 1))
                res_dist = res_dist.reshape((-1, 1))
                for i in range(self.k - 1):
                    np.argmin(dist_matrix, axis=1, out=tmp_index)
                    res_dist = np.hstack((res_dist,
                                          dist_matrix[np.arange(dist_matrix.shape[0]), tmp_index][:, np.newaxis]))
                    dist_matrix[np.arange(dist_matrix.shape[0]), tmp_index] = np.inf
                    res_index = np.hstack((res_index, tmp_index[:, np.newaxis]))
                return res_dist, res_index
        else:
            return self.nn.kneighbors(x, return_distance=return_distance)

    def predict(self, x, k=None):
        if k is not None:
            if not isinstance(k, int) or k < 1:
                raise AttributeError('Incorrect "k" parameter')
            else:
                if self.strategy == 'my_own':
                    self.k = k
                else:
                    params = self.nn.get_params()
                    params['n_neighbors'] = k
                    self.nn = self.nn.set_params(**params)

        if self.mode == 'uniform':
            nn_index = self.training_labels[self.find_kneighbors(x, return_distance=False)]
            return np.mean(nn_index, axis=1)
        else:
            vec_weight = np.vectorize(lambda z: 1 / (z + 0.00001))
            nn_dist, nn_index = self.find_kneighbors(x)
            nn_index = self.training_labels[nn_index]
            nn_dist = vec_weight(nn_dist)
            return np.sum(nn_index * nn_dist, axis=1) / np.sum(nn_dist, axis=1)
Example #2
0
from sklearn.preprocessing import normalize
from sklearn.neighbors import NearestNeighbors



from libretrieval.features.io import load_features


k = 10

features = load_features("/home/alberto/phD/datasets/places365/vgg16/features/collection/")
features_l2 = normalize(features, 'l2', axis=1)

vars = np.var(features, axis=0, dtype=np.float64)
print(vars.dtype)

q_features = load_features("/home/alberto/phD/datasets/places365/vgg16/features/query/")
q_features_l2 = normalize(q_features, 'l2', axis=1)


brute_ngbrs = NearestNeighbors(n_neighbors=10000, algorithm='brute', metric=sys.argv[1], n_jobs=1)
_ = brute_ngbrs.fit(features)


print(brute_ngbrs.get_params())
ts = time.perf_counter()
print("Brute Query -- ", end='', flush=False)
dists_sk, indices_sk = brute_ngbrs.kneighbors(q_features[0:k], n_neighbors=10000)
print("  Elapsed {0:0.8f}s".format(time.perf_counter() - ts))
Example #3
0
class Wolpertinger(object):
    def __init__(self, env, i=1, nn_index_file=None, action_set=None):
        FLAGS.nn_index_file = nn_index_file if nn_index_file is not None else FLAGS.nn_index_file
        # FLAGS.create_index = create_index if create_index is not None else FLAGS.create_index
        self.__env = env
        self.__flann_params = None

        # Discretization  of the action set
        if action_set is None:
            self.__action_space = env.action_space
            shape = self.__action_space.shape[0]
            lower = self.__action_space.low
            higher = self.__action_space.high

            i = i + 1 if i == 1 else i
            self.__A = np.zeros((i, shape), dtype=lower.dtype)
            for d in np.arange(shape):
                low = lower[d]
                high = higher[d]
                self.__A[..., d] = np.linspace(low,
                                               high,
                                               dtype=low.dtype,
                                               num=i)
        else:  # custom action set
            self.__A = action_set

        if FLAGS.knn_backend == 'pyflann':
            self._use_pyflann()
        else:
            self._use_sklean()

    def _use_pyflann(self):
        """
        Use Pyflann library for Knn index generation
        :return:
        """

        self.__flann = fl.FLANN()
        fl.set_distance_type('euclidean')  # L2 norm distance

        if FLAGS.create_index:

            self.__flann_params = self.__flann.build_index(
                self.__A,
                algorithm="autotuned",
                target_precision=FLAGS.nn_target_precision,
                log_level="info")

            print(
                'KNN index created with auto-tuned configuration params: {}.'.
                format(str(self.__flann_params)))

            if FLAGS.nn_index_file is not None:
                self.__flann.save_index(FLAGS.nn_index_file)
                print('KNN index file stored in {}'.format(
                    FLAGS.nn_index_file))

        elif FLAGS.nn_index_file is not None:
            self.__flann.load_index(FLAGS.nn_index_file, self.__A)
        else:
            raise Exception(
                "Error in parameter configuration. Index was not created/loaded"
            )

        print(
            "Wolpertinger policy configuration: \n " +
            "Create index: {} \n Target Precision: {} \n knn: {} \n Index File: {} \n # points: {} \n Checks: {}"
            .format(
                FLAGS.create_index, FLAGS.nn_target_precision, FLAGS.knn,
                FLAGS.nn_index_file, self.__A.shape[0],
                self.__flann_params["checks"]
                if self.__flann_params else FLAGS.knn_checks))

    def _use_sklean(self):
        """
        Use sklearn lib for NearestNeighbors index generation
        :return:
        """
        if FLAGS.create_index:
            self.__alg = NearestNeighbors(algorithm='auto',
                                          leaf_size=30,
                                          metric='euclidean',
                                          n_jobs=-1)
            self.__alg.fit(self.__A)
            joblib.dump(self.__alg, 'knn-model.pkl')
        else:
            self.__alg = joblib.load('knn-model.pkl')

        print "Knn index {} using sklearn backend. Configuration params: {}".format(
            "created" if FLAGS.create_index else "loaded",
            self.__alg.get_params())
        # action = np.loadtxt("testvector")
        # rs = self.g(action, k=194)
        # print "done"

    def g(self, action, k=None):
        '''
        Function g that returns the k-nearest-neighbors for a given continuous action
        Args:
            action: continuous action

        Returns:
            k-nearest-neighbors
        '''

        nearest_neighbors = k if k is not None else FLAGS.knn
        checks = self.__flann_params[
            "checks"] if self.__flann_params else FLAGS.knn_checks

        if FLAGS.knn_backend == "pyflann":
            results, dists = self.__flann.nn_index(action,
                                                   nearest_neighbors,
                                                   checks=checks)
        else:
            dists, results = self.__alg.kneighbors([action], nearest_neighbors)
            results = results[0]
        return [self.__A[val] for val in results]

    @property
    def discrete_actions(self):
        return self.__A