Exemple #1
0
def kmm(Xtrain, Xtest, sigma):
    n_tr = len(Xtrain)
    n_te = len(Xtest)

    # calculate Kernel
    print('Computing kernel for training data ...')
    K_ns = sk.rbf_kernel(Xtrain, Xtrain, sigma)
    # make it symmetric
    K = 0.9 * (K_ns + K_ns.transpose())

    # calculate kappa
    print('Computing kernel for kappa ...')
    kappa_r = sk.rbf_kernel(Xtrain, Xtest, sigma)
    ones = numpy.ones(shape=(n_te, 1))
    kappa = numpy.dot(kappa_r, ones)
    kappa = -(float(n_tr) / float(n_te)) * kappa

    # calculate eps
    eps = (math.sqrt(n_tr) - 1) / math.sqrt(n_tr)

    # constraints
    A0 = numpy.ones(shape=(1, n_tr))
    A1 = -numpy.ones(shape=(1, n_tr))
    A = numpy.vstack([A0, A1, -numpy.eye(n_tr), numpy.eye(n_tr)])
    b = numpy.array([[n_tr * (eps + 1), n_tr * (eps - 1)]])
    b = numpy.vstack([b.T, -numpy.zeros(shape=(n_tr, 1)), numpy.ones(shape=(n_tr, 1)) * 1000])

    print('Solving quadratic program for beta ...')
    P = matrix(K, tc='d')
    q = matrix(kappa, tc='d')
    G = matrix(A, tc='d')
    h = matrix(b, tc='d')
    beta = solvers.qp(P, q, G, h)
    return [i for i in beta['x']]
 def _get_kernel(self, X, y=None):
     if self.kernel == "rbf":
         if y is None:
             return rbf_kernel(X, X, gamma=self.gamma)
         else:
             return rbf_kernel(X, y, gamma=self.gamma)
     elif self.kernel == "knn":
         if self.nn_fit is None:
             self.nn_fit = NearestNeighbors(self.n_neighbors).fit(X)
         if y is None:
             # Nearest neighbors returns a directed matrix.
             dir_graph = self.nn_fit.kneighbors_graph(self.nn_fit._fit_X,
                                                      self.n_neighbors,
                                                      mode='connectivity')
             # Making the matrix symmetric
             un_graph = dir_graph + dir_graph.T
             # Since it is a connectivity matrix, all values should be
             # either 0 or 1
             un_graph[un_graph > 1.0] = 1.0
             return un_graph
         else:
             return self.nn_fit.kneighbors(y, return_distance=False)
     else:
         raise ValueError("%s is not a valid kernel. Only rbf and knn"
                          " are supported at this time" % self.kernel)
Exemple #3
0
    def _Gram(self, X):
        if X is self.X:
            if self.Gs_train is None:
                kernel_scalar = rbf_kernel(self.X, gamma=self.gamma)[:, :,
                                                                     newaxis,
                                                                     newaxis]
                delta = subtract(X.T[:, newaxis, :], self.X.T[:, :, newaxis])
                self.Gs_train = asarray(transpose(
                    2 * self.gamma * kernel_scalar *
                    (2 * self.gamma * (delta[:, newaxis, :, :] *
                                       delta[newaxis, :, :, :]).transpose(
                        (3, 2, 0, 1)) +
                        ((self.p - 1) - 2 * self.gamma *
                         _norm_axis_0(delta)[:, :, newaxis, newaxis]**2) *
                        eye(self.p)[newaxis, newaxis, :, :]), (0, 2, 1, 3)
                )).reshape((self.p * X.shape[0], self.p * self.X.shape[0]))
            return self.Gs_train

        kernel_scalar = rbf_kernel(X, self.X, gamma=self.gamma)[:, :,
                                                                newaxis,
                                                                newaxis]
        delta = subtract(X.T[:, newaxis, :], self.X.T[:, :, newaxis])
        return asarray(transpose(
            2 * self.gamma * kernel_scalar *
            (2 * self.gamma * (delta[:, newaxis, :, :] *
                               delta[newaxis, :, :, :]).transpose(
                (3, 2, 0, 1)) +
                ((self.p - 1) - 2 * self.gamma *
                 _norm_axis_0(delta).T[:, :, newaxis, newaxis]**2) *
                eye(self.p)[newaxis, newaxis, :, :]), (0, 2, 1, 3)
        )).reshape((self.p * X.shape[0], self.p * self.X.shape[0]))
Exemple #4
0
def hsic(x,y,sigma):
    """Compute HSIC between two random variables

    Parameters
    ----------
    x : array, shape(n_instances,1)
        vector containing m observations of the first random variable
    y : array, shape(n_instances,1)
        vector containing m observations of the second random variable
    sigma : scale parameter for Gaussian kernel

    Returns
    -------
    hsic_value : float, HSIC value of the two input random variables
    """
    # m is the number of observations here
    m = len(x)
    gamma = 1.0/(2*sigma**2)

    k = rbf_kernel(x,x,gamma)
    l = rbf_kernel(y,y,gamma)
    for i in range(m):
        k[i,i] = 0
        l[i,i] = 0
    h = np.eye(m)-1.0/m
    hsic_value = (1.0/(m-1)**2)*np.trace(np.dot(np.dot(np.dot(k,h),l),h))
    return hsic_value
    def fit(self, X):
        A = tools.kmeans_centroids(X, self.n_prototypes).cluster_centers_
        self.W = rbf_kernel(A, A, gamma = 1./self.sigma2)
        self.H = rbf_kernel(X, A, gamma = 1./self.sigma2)

        self.W_dagger = np.linalg.pinv(self.W)

        d_tilde = self.H.dot(self.W_dagger.dot(self.H.T.sum(axis=1)))
        self.HtH = self.H.T.dot(self.H)
        self.HtSH = (self.H.T * d_tilde).dot(self.H) - self.HtH.dot(self.W_dagger).dot(self.HtH.T)
        self.n = X.shape[0]
def test_pairwise_kernels():
    """ Test the pairwise_kernels helper function. """

    def callable_rbf_kernel(x, y, **kwds):
        """ Callable version of pairwise.rbf_kernel. """
        K = rbf_kernel(np.atleast_2d(x), np.atleast_2d(y), **kwds)
        return K

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((2, 4))
    # Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS.
    test_metrics = ["rbf", "sigmoid", "polynomial", "linear", "chi2",
                    "additive_chi2"]
    for metric in test_metrics:
        function = PAIRWISE_KERNEL_FUNCTIONS[metric]
        # Test with Y=None
        K1 = pairwise_kernels(X, metric=metric)
        K2 = function(X)
        assert_array_almost_equal(K1, K2)
        # Test with Y=Y
        K1 = pairwise_kernels(X, Y=Y, metric=metric)
        K2 = function(X, Y=Y)
        assert_array_almost_equal(K1, K2)
        # Test with tuples as X and Y
        X_tuples = tuple([tuple([v for v in row]) for row in X])
        Y_tuples = tuple([tuple([v for v in row]) for row in Y])
        K2 = pairwise_kernels(X_tuples, Y_tuples, metric=metric)
        assert_array_almost_equal(K1, K2)

        # Test with sparse X and Y
        X_sparse = csr_matrix(X)
        Y_sparse = csr_matrix(Y)
        if metric in ["chi2", "additive_chi2"]:
            # these don't support sparse matrices yet
            assert_raises(ValueError, pairwise_kernels,
                          X_sparse, Y=Y_sparse, metric=metric)
            continue
        K1 = pairwise_kernels(X_sparse, Y=Y_sparse, metric=metric)
        assert_array_almost_equal(K1, K2)
    # Test with a callable function, with given keywords.
    metric = callable_rbf_kernel
    kwds = {}
    kwds['gamma'] = 0.1
    K1 = pairwise_kernels(X, Y=Y, metric=metric, **kwds)
    K2 = rbf_kernel(X, Y=Y, **kwds)
    assert_array_almost_equal(K1, K2)

    # callable function, X=Y
    K1 = pairwise_kernels(X, Y=X, metric=metric, **kwds)
    K2 = rbf_kernel(X, Y=X, **kwds)
    assert_array_almost_equal(K1, K2)
def compute_err(parameters):
    X,k_train,USV_k_train,n_SR,method,gamma,seed,k = parameters
    np.random.seed(seed)
    idx_SR = subsampling(X,int(n_SR),method)
    X_SR = X[idx_SR]
    k_SR = rbf_kernel(X_SR,X_SR,gamma=gamma)
    U_SR, S_SR, V_SR = np.linalg.svd(k_SR)
    USV_k_SR = [U_SR, S_SR, V_SR]
    k_train_SR = rbf_kernel(X,X_SR,gamma=gamma)
    rel_err, USV_k_SR = rel_approx_error(k_train, USV_k_SR, idx_SR)
    rel_acc = rel_approx_acc(k_train,USV_k_train,k_SR,USV_k_SR,idx_SR,k)
    quan_err = quan_error(X,X_SR)
    return [rel_err,quan_err,rel_acc]
 def fit(self, X, y, unlabeled_data=None):
   num_data = X.shape[0] + unlabeled_data.shape[0]
   num_labeled = X.shape[0]
   num_unlabeled = unlabeled_data.shape[0]
   labeled = np.zeros((num_data,), dtype=np.float32)
   labeled[0:num_labeled] = 1.0
   if issparse(X):
     self.X_ = vstack((util.cast_to_float32(X),
                       util.cast_to_float32(unlabeled_data)), format='csr')
   else:
     self.X_ = np.concatenate((util.cast_to_float32(X),
                               util.cast_to_float32(unlabeled_data)))
   self.gamma = (
       self.gamma if self.gamma is not None else 1.0 / X.shape[1])
   self.kernel_params = {'gamma':self.gamma, 'degree':self.degree, 'coef0':self.coef0}
   kernel_matrix = pairwise_kernels(self.X_, metric=self.kernel,
                                    filter_params=True, **self.kernel_params)
   A = np.dot(np.diag(labeled), kernel_matrix)
   if self.nu2 != 0:
     if self.kernel == 'rbf':
       laplacian_kernel_matrix = kernel_matrix
     else:
       laplacian_kernel_matrix = rbf_kernel(self.X_, gamma=self.gamma)
     laplacian_x_kernel = np.dot(graph_laplacian(
         laplacian_kernel_matrix, normed=self.normalize_laplacian), kernel_matrix)
     A += self.nu2 * laplacian_x_kernel
   y = np.concatenate((y, -np.ones((num_unlabeled,), dtype=np.float32)),
                      axis=0)
   super(LapRLSC, self).fit(A, y, class_for_unlabeled=-1)
Exemple #9
0
    def __init__(self, *args, **kwargs):
        super(QUIRE, self).__init__(*args, **kwargs)
        self.Uindex = [idx for idx, _ in self.dataset.get_unlabeled_entries()]
        self.Lindex = [idx for idx in range(len(self.dataset)) if idx not in self.Uindex]
        self.lmbda = kwargs.pop("lambda", 1.0)
        X, self.y = zip(*self.dataset.get_entries())
        self.y = list(self.y)
        self.kernel = kwargs.pop("kernel", "rbf")
        if self.kernel == "rbf":
            self.K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop("gamma", 1.0))
        elif self.kernel == "poly":
            self.K = polynomial_kernel(
                X=X, Y=X, coef0=kwargs.pop("coef0", 1), degree=kwargs.pop("degree", 3), gamma=kwargs.pop("gamma", 1.0)
            )
        elif self.kernel == "linear":
            self.K = linear_kernel(X=X, Y=X)
        elif hasattr(self.kernel, "__call__"):
            self.K = self.kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self.K, np.ndarray):
            raise TypeError("K should be an ndarray")
        if self.K.shape != (len(X), len(X)):
            raise ValueError("kernel should have size (%d, %d)" % (len(X), len(X)))
        self.L = np.linalg.inv(self.K + self.lmbda * np.eye(len(X)))
Exemple #10
0
def bourgain_embedding_matrix(distance_matrix):
    """Use Bourgain algorithm to embed the neural architectures based on their edit-distance.

    Args:
        distance_matrix: A matrix of edit-distances.

    Returns:
        A matrix of distances after embedding.

    """
    distance_matrix = np.array(distance_matrix)
    n = len(distance_matrix)
    if n == 1:
        return distance_matrix
    np.random.seed(123)
    distort_elements = []
    r = range(n)
    k = int(math.ceil(math.log(n) / math.log(2) - 1))
    t = int(math.ceil(math.log(n)))
    counter = 0
    for i in range(0, k + 1):
        for t in range(t):
            s = np.random.choice(r, 2 ** i)
            for j in r:
                d = min([distance_matrix[j][s] for s in s])
                counter += len(s)
                if i == 0 and t == 0:
                    distort_elements.append([d])
                else:
                    distort_elements[j].append(d)
    return rbf_kernel(distort_elements, distort_elements)
Exemple #11
0
    def _apply_kernel(self, X, y=None):
        """Apply the selected kernel function to the data."""
        if self.kernel == 'linear':
            phi = linear_kernel(X, y)
        elif self.kernel == 'rbf':
            phi = rbf_kernel(X, y, gamma=self.gamma)
        elif self.kernel == 'poly':
            phi = polynomial_kernel(X, y, degree=self.degree)
        elif callable(self.kernel):
            phi = self.kernel(X, y)
            if len(phi.shape) != 2:
                raise ValueError(
                    "Custom kernel function did not return 2D matrix"
                )
            if phi.shape[0] != X.shape[0]:
                raise ValueError(
                    "Custom kernel function did not return matrix with rows"
                    " equal to number of data points."""
                )
        else:
            raise ValueError("Kernel selection is invalid.")
        phi = phi.T
        if self.bias_used:
            phi = np.hstack((np.ones((phi.shape[0], 1)), phi))

        return phi
Exemple #12
0
def test_svc_decision_function():
    """
    Test SVC's decision_function

    Sanity check, test that decision_function implemented in python
    returns the same as the one in libsvm

    """
    # multi class:
    clf = svm.SVC(kernel="linear", C=0.1).fit(iris.data, iris.target)

    dec = np.dot(iris.data, clf.coef_.T) + clf.intercept_

    assert_array_almost_equal(dec, clf.decision_function(iris.data))

    # binary:
    clf.fit(X, Y)
    dec = np.dot(X, clf.coef_.T) + clf.intercept_
    prediction = clf.predict(X)
    assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
    assert_array_almost_equal(prediction, clf.classes_[(clf.decision_function(X) > 0).astype(np.int)])
    expected = np.array([-1.0, -0.66, -1.0, 0.66, 1.0, 1.0])
    assert_array_almost_equal(clf.decision_function(X), expected, 2)

    # kernel binary:
    clf = svm.SVC(kernel="rbf", gamma=1)
    clf.fit(X, Y)

    rbfs = rbf_kernel(X, clf.support_vectors_, gamma=clf.gamma)
    dec = np.dot(rbfs, clf.dual_coef_.T) + clf.intercept_
    assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
def test_nystroem_approximation():
    # some basic tests
    rnd = np.random.RandomState(0)
    X = rnd.uniform(size=(10, 4))

    # With n_components = n_samples this is exact
    X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X)
    K = rbf_kernel(X)
    assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)

    trans = Nystroem(n_components=2, random_state=rnd)
    X_transformed = trans.fit(X).transform(X)
    assert_equal(X_transformed.shape, (X.shape[0], 2))

    # test callable kernel
    linear_kernel = lambda X, Y: np.dot(X, Y.T)
    trans = Nystroem(n_components=2, kernel=linear_kernel, random_state=rnd)
    X_transformed = trans.fit(X).transform(X)
    assert_equal(X_transformed.shape, (X.shape[0], 2))

    # test that available kernels fit and transform
    kernels_available = kernel_metrics()
    for kern in kernels_available:
        trans = Nystroem(n_components=2, kernel=kern, random_state=rnd)
        X_transformed = trans.fit(X).transform(X)
        assert_equal(X_transformed.shape, (X.shape[0], 2))
Exemple #14
0
def cartesian_affinities(data, distance = 2.0, sigma = 1.0):
    """
    Computes affinities between points using euclidean distance, and 
    sets to 0 all affinities for which the points are further than a certain
    threshold apart.

    Parameters
    ----------
    data : array, shape (N, M)
        N instances of M-dimensional data.
    distance : float
        Distance threshold, above which all affinities are set to 0.
    sigma : float
        Sigma used to compute affinities.

    Returns
    -------
    A : array, shape (N, N)
        Symmetric affinity matrix.
    """
    A = pairwise.rbf_kernel(data, data, gamma = (1.0 / (2 * (sigma ** 2))))
    if (distance > 0.0):
        distances = pairwise.pairwise_distances(data)
        A[np.where(distances > distance)] = 0.0
    return A
Exemple #15
0
def test_decision_function():
    # Test decision_function
    # Sanity check, test that decision_function implemented in python
    # returns the same as the one in libsvm
    # multi class:
    clf = svm.SVC(kernel='linear', C=0.1,
                  decision_function_shape='ovo').fit(iris.data, iris.target)

    dec = np.dot(iris.data, clf.coef_.T) + clf.intercept_

    assert_array_almost_equal(dec, clf.decision_function(iris.data))

    # binary:
    clf.fit(X, Y)
    dec = np.dot(X, clf.coef_.T) + clf.intercept_
    prediction = clf.predict(X)
    assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
    assert_array_almost_equal(
        prediction,
        clf.classes_[(clf.decision_function(X) > 0).astype(np.int)])
    expected = np.array([-1., -0.66, -1., 0.66, 1., 1.])
    assert_array_almost_equal(clf.decision_function(X), expected, 2)

    # kernel binary:
    clf = svm.SVC(kernel='rbf', gamma=1, decision_function_shape='ovo')
    clf.fit(X, Y)

    rbfs = rbf_kernel(X, clf.support_vectors_, gamma=clf.gamma)
    dec = np.dot(rbfs, clf.dual_coef_.T) + clf.intercept_
    assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
def test_spectral_embedding_callable_affinity(seed=36):
    # Test spectral embedding with callable affinity
    gamma = 0.9
    kern = rbf_kernel(S, gamma=gamma)
    se_callable = SpectralEmbedding(
        n_components=2,
        affinity=(lambda x: rbf_kernel(x, gamma=gamma)),
        gamma=gamma,
        random_state=np.random.RandomState(seed),
    )
    se_rbf = SpectralEmbedding(n_components=2, affinity="rbf", gamma=gamma, random_state=np.random.RandomState(seed))
    embed_rbf = se_rbf.fit_transform(S)
    embed_callable = se_callable.fit_transform(S)
    assert_array_almost_equal(se_callable.affinity_matrix_, se_rbf.affinity_matrix_)
    assert_array_almost_equal(kern, se_rbf.affinity_matrix_)
    assert_true(_check_with_col_sign_flipping(embed_rbf, embed_callable, 0.05))
Exemple #17
0
    def _apply_kernel(self, x, y):
        """Apply the selected kernel function to the data."""
        if self.kernel == 'linear':
            phi = linear_kernel(x, y)
        elif self.kernel == 'rbf':
            phi = rbf_kernel(x, y, self.coef1)
        elif self.kernel == 'poly':
            phi = polynomial_kernel(x, y, self.degree, self.coef1, self.coef0)
        elif callable(self.kernel):
            phi = self.kernel(x, y)
            if len(phi.shape) != 2:
                raise ValueError(
                    "Custom kernel function did not return 2D matrix"
                )
            if phi.shape[0] != x.shape[0]:
                raise ValueError(
                    "Custom kernel function did not return matrix with rows"
                    " equal to number of data points."""
                )
        else:
            raise ValueError("Kernel selection is invalid.")

        if self.bias_used:
            phi = np.append(phi, np.ones((phi.shape[0], 1)), axis=1)

        return phi
Exemple #18
0
def predict(X,y,gamma):
        
    K = rbf_kernel(X.reshape(-1,1),X.reshape(-1,1),gamma)
    pred = (K * y[:, None]).sum(axis=0) / K.sum(axis=0)
        
    return pred
        
    def fit(self, X, y, L):
        """Fit the model according to the given training data.

        Prameters
        ---------
        X : array-like, shpae = [n_samples, n_features]
            Training data.

        y : array-like, shpae = [n_samples]
            Target values (unlabeled points are marked as 0).

        L : array-like, shpae = [n_samples, n_samples]
            Graph Laplacian.
        """

        labeled               = y != 0
        y_labeled             = y[labeled]
        n_samples, n_features = X.shape
        n_labeled_samples     = y_labeled.size
        I                     = sp.eye(n_samples)
        J                     = sp.diags(labeled.astype(np.float64))
        K                     = rbf_kernel(X, gamma=self.gamma_k)
        M                     = J @ K \
            + self.gamma_a * n_labeled_samples * I \
            + self.gamma_i * n_labeled_samples / n_samples**2 * L**self.p @ K

        # Train a classifer
        self.dual_coef_       = LA.solve(M, y)

        return self
Exemple #20
0
    def _get_affinity_matrix(self, X, Y=None):
        """Calculate the affinity matrix from data
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

            If affinity is "precomputed"
            X : array-like, shape (n_samples, n_samples),
            Interpret X as precomputed adjacency graph computed from
            samples.

        Returns
        -------
        affinity_matrix, shape (n_samples, n_samples)
        """
        if self.affinity == 'precomputed':
            self.affinity_matrix_ = X
            print( type(             self.affinity_matrix_))
            return self.affinity_matrix_
            
        # nearest_neigh kept for backward compatibility 
        if self.affinity == 'nearest_neighbors':
            if sparse.issparse(X):
                warnings.warn("Nearest neighbors affinity currently does "
                              "not support sparse input, falling back to "
                              "rbf affinity")
                self.affinity = "rbf"
            else:
                self.n_neighbors_ = (self.n_neighbors
                                     if self.n_neighbors is not None
                                     else max(int(X.shape[0] / 10), 1))
                self.affinity_matrix_ = kneighbors_graph(X, self.n_neighbors_)
                # currently only symmetric affinity_matrix supported
                self.affinity_matrix_ = 0.5 * (self.affinity_matrix_ +
                                               self.affinity_matrix_.T)
                return self.affinity_matrix_
        if self.affinity == 'radius_neighbors':
            if self.neighbors_radius is None:
                self.neighbors_radius_ =  np.sqrt(X.shape[1])
                # to put another defaault value, like diam(X)/sqrt(dimensions)/10
            else:
                self.neighbors_radius_ = self.neighbors_radius
                
            self.gamma_ = (self.gamma
                           if self.gamma is not None else 1.0 / X.shape[1])
            self.affinity_matrix_ = radius_neighbors_graph(X, self.neighbors_radius_, mode='distance')
            
            self.affinity_matrix_.data **= 2              
            self.affinity_matrix_.data /= -self.neighbors_radius_**2
            self.affinity_matrix_.data = np.exp( self.affinity_matrix_.data, self.affinity_matrix_.data )
            return self.affinity_matrix_
        if self.affinity == 'rbf':
            self.gamma_ = (self.gamma
                           if self.gamma is not None else 1.0 / X.shape[1])
            self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)
            return self.affinity_matrix_
        self.affinity_matrix_ = self.affinity(X)
        return self.affinity_matrix_
Exemple #21
0
 def __kernel_definition__(self):
     if self.Kf == 'rbf':
         return lambda X,Y : rbf_kernel(X,Y,self.rbf_gamma)
     if self.Kf == 'poly':
         return lambda X,Y : polynomial_kernel(X, Y, degree=self.poly_deg, gamma=None, coef0=self.poly_coeff)
     if self.Kf == None or self.Kf == 'linear':
         return lambda X,Y : linear_kernel(X,Y)
Exemple #22
0
def calculate_affinities(data, neighborhood, sigma):
    """
    Calculates pairwise affinities for the data.

    Parameters
    ----------
    data : array, shape (N, M)
        Matrix of M-dimensional data points.
    neighborhood : float
        L2 distance threshold for computing affinities, anything outside of this
        threshold is set to 0.
    sigma : float
        Sigma value for computing affinities.

    Returns
    -------
    affinities : array, shape (N, N)
        Pairwise affinities for all data points.
    """
    affinities = pairwise.rbf_kernel(data, data, gamma = (1.0 / (2 * sigma * sigma)))
    distances = pairwise.pairwise_distances(data)

    # affinities and distances are the same dimensionality: (N, N)
    affinities[np.where(distances > neighborhood)] = 0.0
    return affinities
def test_spectral_embedding_deterministic():
    # Test that Spectral Embedding is deterministic
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    embedding_1 = spectral_embedding(sims)
    embedding_2 = spectral_embedding(sims)
    assert_array_almost_equal(embedding_1, embedding_2)
def test_pairwise_kernels_callable():
    # Test the pairwise_kernels helper function
    # with a callable function, with given keywords.
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((2, 4))

    metric = callable_rbf_kernel
    kwds = {'gamma': 0.1}
    K1 = pairwise_kernels(X, Y=Y, metric=metric, **kwds)
    K2 = rbf_kernel(X, Y=Y, **kwds)
    assert_array_almost_equal(K1, K2)

    # callable function, X=Y
    K1 = pairwise_kernels(X, Y=X, metric=metric, **kwds)
    K2 = rbf_kernel(X, Y=X, **kwds)
    assert_array_almost_equal(K1, K2)
def test_laplacian_eigenmap_deterministic():
    # Test that laplacian eigenmap is deterministic
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    embedding_1 = laplacian_eigenmap(sims)
    embedding_2 = laplacian_eigenmap(sims)
    assert_array_almost_equal(embedding_1, embedding_2)
def test_spectral_embeding_import():
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)

    assert_warns_message(DeprecationWarning, "spectral_embedding is deprecated",
                         spectral_embedding, sims)
    assert_warns_message(DeprecationWarning, "SpectralEmbedding is deprecated",
                         SpectralEmbedding)
Exemple #27
0
 def decision_function(self, X):
     if self.kernel == 'linear':
         f = np.dot(X, self._w) + self._b
     elif self.kernel == 'rbf':
         ## rbf_kernel returns array of shape (n_samples_X, n_samples_Y)
         assert self._ya.shape == (len(self._X), 1)
         f = np.sum(np.multiply(rbf_kernel(self._X, X), self._ya), axis=0) + self._b
     f = np.squeeze(np.array(f))
     return f
def test_spectral_embedding_precomputed_affinity(seed=36):
    # Test spectral embedding with precomputed kernel
    gamma = 1.0
    se_precomp = SpectralEmbedding(n_components=2, affinity="precomputed", random_state=np.random.RandomState(seed))
    se_rbf = SpectralEmbedding(n_components=2, affinity="rbf", gamma=gamma, random_state=np.random.RandomState(seed))
    embed_precomp = se_precomp.fit_transform(rbf_kernel(S, gamma=gamma))
    embed_rbf = se_rbf.fit_transform(S)
    assert_array_almost_equal(se_precomp.affinity_matrix_, se_rbf.affinity_matrix_)
    assert_true(_check_with_col_sign_flipping(embed_precomp, embed_rbf, 0.05))
 def fit(self, X, y):
     t = time()  # get labels for test data
     # build the graph result is the affinity matrix
     if self.kernel is 'dbscan' or self.kernel is None:
         affinity_matrix = self.dbscan(X, self.eps, self.minPts)
     # it is possible to use other kernels -> as parameter
     elif self.kernel is 'rbf':
         affinity_matrix = rbf_kernel(X, X, gamma=self.gamma)
     elif self.kernel is 'knn':
         affinity_matrix = NearestNeighbors(self.naighbors).fit(X).kneighbors_graph(X, self.naighbors).toarray()
     else:
         raise
     print( "praph(%s) time %2.3fms"%(self.kernel, (time() - t) *1000))
     if affinity_matrix.max() == 0 :
         print("no affinity matrix found")
         return y
     
     degree_martix   = np.diag(affinity_matrix.sum(axis=0))
     affinity_matrix = np.matrix(affinity_matrix)
     
     try:
         inserve_degree_matrix = np.linalg.inv(degree_martix)
     except np.linalg.linalg.LinAlgError as err:
         if 'Singular matrix' in err.args:
             # use a pseudo inverse if it's not possible to make a normal of the degree matrix
             inserve_degree_matrix =  np.linalg.pinv(degree_martix)
         else:
             raise
         
     matrix = inserve_degree_matrix * affinity_matrix
     # split labels in different vectors to calculate the propagation for the separate label
     labels = np.unique(y)
     labels = [x for x in labels if x != self.unlabeledValue]
     # init the yn1 and y0
     y0  = [[1 if (x == l) else 0 for x in y] for l in labels]
     yn1 = y0
     # function to set the probability to 1 if it was labeled in the source
     toOrgLabels      = np.vectorize(lambda x, y : 1 if y == 1 else x , otypes=[np.int0])
     # function to set the index's of the source labeled
     toOrgLabelsIndex = np.vectorize(lambda x, y, z : z if y == 1 else x , otypes=[np.int0])
     lastLabels       = np.argmax(y0, axis=0)
     while True:
         yn1 = yn1 * matrix
         #first matrix to labels
         ynLablesIndex = np.argmax(yn1, axis=0)
         # row-normalize
         yn1 /= yn1.max()
         yn1 = toOrgLabels(yn1, y0)
         for x in y0:
             ynLablesIndex = toOrgLabelsIndex(ynLablesIndex, x, y0.index(x))
         #second original labels to result
         if np.array_equiv(ynLablesIndex, lastLabels):
             break
         lastLabels = ynLablesIndex
     # result is the index of the labels -> cast index to the given labels
     toLabeles = np.vectorize(lambda x : labels[x])
     return np.array(toLabeles(lastLabels))[0]
def cls(mkl):

    for data in datasets:
        print "####################"
        print '# ',data
        print "####################" 
        # consider labels with more than 2%
        t = 0.02
        datadir = '../data/'
        km_dir = datadir + data + "/"
        if data == 'Fingerprint':
            kernels = ['PPKr', 'NB','CP2','NI','LB','CPC','RLB','LC','LI','CPK','RLI','CSC']
            km_list = []
            y = np.loadtxt(km_dir+"y.txt",ndmin=2)
            p = np.sum(y==1,0)/float(y.shape[0])        
            y = y[:,p>t]

            for k in kernels:
                km_f = datadir + data + ("/%s.txt" % k)
                km_list.append(center(normalize_km(np.loadtxt(km_f))))

            pred_f = "../ovkr_result/pred/%s_cvpred_%s.npy" % (data, mkl)
            pred = ovkr_mkl(km_list, y, mkl, 5, data,data)
            np.save(pred_f, pred)

        elif data in image_datasets:
            y = np.loadtxt(km_dir+"y.txt",ndmin=2)
            p = np.sum(y==1,0)/float(y.shape[0])        
            y = y[:,p>t]
            linear_km_list = []
            for i in range(1,16):
                name = 'kernel_linear_%d.txt' % i
                km_f = km_dir+name
                km = np.loadtxt(km_f)
                # normalize input kernel !!!!!!!!
                linear_km_list.append(center(normalize_km(km)))
            pred_f = "../ovkr_result/pred/%s_cvpred_%s.npy" % (data, mkl)
            pred = ovkr_mkl(linear_km_list, y, mkl, 5, data,data)
            np.save(pred_f, pred)

        else:
            rbf_km_list = []
            gammas = [2**-13,2**-11,2**-9,2**-7,2**-5,2**-3,2**-1,2**1,2**3]
            X = np.loadtxt(km_dir+"/x.txt")
            scaler = preprocessing.StandardScaler().fit(X)
            X = scaler.transform(X)
            X = preprocessing.normalize(X)
            y = np.loadtxt(km_dir+"y.txt")
            p = np.sum(y==1,0)/float(y.shape[0])        
            y = y[:,p>t]
            for gamma in gammas:
                km = rbf_kernel(X, gamma=gamma)
                # normalize input kernel !!!!!!!!
                rbf_km_list.append(center(km))
            pred_f = "../ovkr_result/pred/%s_cvpred_%s.npy" % (data, mkl)
            pred = ovkr_mkl(rbf_km_list, y, mkl, 5, data,data)
            np.save(pred_f, pred)
Exemple #31
0
def test_svr_predict():
    # Test SVR's decision_function
    # Sanity check, test that predict implemented in python
    # returns the same as the one in libsvm

    X = iris.data
    y = iris.target

    # linear kernel
    reg = svm.SVR(kernel='linear', C=0.1).fit(X, y)

    dec = np.dot(X, reg.coef_.T) + reg.intercept_
    assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())

    # rbf kernel
    reg = svm.SVR(kernel='rbf', gamma=1).fit(X, y)

    rbfs = rbf_kernel(X, reg.support_vectors_, gamma=reg.gamma)
    dec = np.dot(rbfs, reg.dual_coef_.T) + reg.intercept_
    assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())
Exemple #32
0
	def test_average_dist(self) :
		# 3 samples
		a = [1, 	3]
		b = [0, 	4]
		c = [2, 	5]
		samples = np.array([a, b, c])
		dist = al.average_distance(samples)

		avg_a = (rbf_kernel([a], [b])[0][0] + rbf_kernel([a], [c])[0][0])/2
		avg_b = (rbf_kernel([b], [a])[0][0] + rbf_kernel([b], [c])[0][0])/2
		avg_c = (rbf_kernel([c], [a])[0][0] + rbf_kernel([c], [b])[0][0])/2
		
		self.assertAlmostEqual(dist[0],avg_a)
		self.assertAlmostEqual(dist[1],avg_b)
		self.assertAlmostEqual(dist[2],avg_c)
Exemple #33
0
    def test_kernel(self):
        # compute kernel with special rbf kernel
        # compute kernel with sklearn kernel
        # compute kernel between sparse_data_ and sparse_data
        # compute_kernel between sparse_data and data
        # compute kernel between sparse_data and random_data
        # compute_kernel between data and random_data

        # sklearn_kernel_first = rbf_kernel(self.data, self.data, self.gamma)
        # sklearn_kernel_verylittle = rbf_kernel(self.data_verylittle, self.data_verylittle)
        for name_pair, pair in self.pairs_data.items():
            data_norm = self.norm_data[name_pair]
            gamma = self.gamma_data[name_pair]
            sklearn_kernel = rbf_kernel(pair, pair, gamma=gamma)

            special_kernel = special_rbf_kernel(pair,
                                                pair,
                                                gamma=gamma,
                                                norm_X=data_norm,
                                                norm_Y=data_norm.T,
                                                exp_outside=False)
            special_kernel_flag = special_rbf_kernel(pair,
                                                     pair,
                                                     gamma=gamma,
                                                     norm_X=data_norm,
                                                     norm_Y=data_norm.T,
                                                     exp_outside=True)
            special_kernel[special_kernel < 1e-12] = 0
            special_kernel_flag[special_kernel_flag < 1e-12] = 0
            sklearn_kernel[sklearn_kernel < 1e-12] = 0

            equality = np.allclose(sklearn_kernel, special_kernel)
            equality_flag = np.allclose(sklearn_kernel, special_kernel_flag)

            delta = np.linalg.norm(special_kernel - sklearn_kernel)
            delta_flag = np.linalg.norm(special_kernel_flag - sklearn_kernel)
            print("Delta flag: {}; delta: {}".format(delta_flag, delta))

            self.assertTrue(delta_flag < delta)
            self.assertTrue(equality, msg=name_pair)
            self.assertTrue(equality_flag, msg=name_pair)
def get_kernel_matrix(X1, X2=None, kernel='rbf',gamma = 1, degree = 3, coef0=1):
    #Obtain N1xN2 kernel matrix from N1xM and N2xM data matrices
    if kernel == 'rbf':
        K = pairwise.rbf_kernel(X1,X2,gamma = gamma);
    elif kernel == 'poly':
        K = pairwise.polynomial_kernel(X1,X2,degree = degree, gamma = gamma,
                                       coef0 = coef0);
    elif kernel == 'linear':
        K = pairwise.linear_kernel(X1,X2);
    elif kernel == 'laplacian':
        K = pairwise.laplacian_kernel(X1,X2,gamma = gamma);
    elif kernel == 'chi2':
        K = pairwise.chi2_kernel(X1,X2,gamma = gamma);
    elif kernel == 'additive_chi2':
        K = pairwise.additive_chi2_kernel(X1,X2);
    elif kernel == 'sigmoid':
        K = pairwise.sigmoid_kernel(X1,X2,gamma = gamma,coef0 = coef0);
    else:
        print('[Error] Unknown kernel');
        K = None;
    return K;
def test_fastfood():
    """test that Fastfood fast approximates kernel on random data"""
    # compute exact kernel
    gamma = 10.
    kernel = rbf_kernel(X, Y, gamma=gamma)

    sigma = np.sqrt(1 / (2 * gamma))

    # approximate kernel mapping
    ff_transform = Fastfood(sigma, n_components=1000, random_state=42)

    pars = ff_transform.fit(X)
    X_trans = pars.transform(X)
    Y_trans = ff_transform.transform(Y)

    # print X_trans, Y_trans
    kernel_approx = np.dot(X_trans, Y_trans.T)

    print('approximation:', kernel_approx[:5, :5])
    print('true kernel:', kernel[:5, :5])
    assert_array_almost_equal(kernel, kernel_approx, decimal=1)
Exemple #36
0
def _kernel(data, centers):
    """
    Euclidean distance from each point to each cluster center.

    Parameters
    ----------
    data : 2d array (N x Q)
        Data to be analyzed. There are N data points.
    centers : 2d array (C x Q)
        Cluster centers. There are C clusters, with Q features.

    Returns
    -------
    dist : 2d array (C x N)
        Euclidean distance from each point, to each cluster center.

    See Also
    --------
    scipy.spatial.distance.cdist
    """
    return rbf_kernel(data, centers, gamma=0.01).T
Exemple #37
0
 def __kernel_definition__(self):
     """Select the kernel function
     
     Returns
     -------
     kernel : a callable relative to selected kernel
     """
     if hasattr(self.kernel, '__call__'):
         return self.kernel
     if self.kernel == 'rbf' or self.kernel == None:
         return lambda X, Y: rbf_kernel(X, Y, self.rbf_gamma)
     if self.kernel == 'poly':
         return lambda X, Y: polynomial_kernel(X,
                                               Y,
                                               degree=self.degree,
                                               gamma=self.rbf_gamma,
                                               coef0=self.coef0)
     if self.kernel == 'linear':
         return lambda X, Y: linear_kernel(X, Y)
     if self.kernel == 'precomputed':
         return lambda X, Y: X
def test_spectral_embedding_unnormalized():
    # Test that spectral_embedding is also processing unnormalized laplacian
    # correctly
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    n_components = 8
    embedding_1 = spectral_embedding(sims,
                                     norm_laplacian=False,
                                     n_components=n_components,
                                     drop_first=False)

    # Verify using manual computation with dense eigh
    laplacian, dd = sparse.csgraph.laplacian(sims,
                                             normed=False,
                                             return_diag=True)
    _, diffusion_map = eigh(laplacian)
    embedding_2 = diffusion_map.T[:n_components] * dd
    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T

    assert_array_almost_equal(embedding_1, embedding_2)
Exemple #39
0
def similarity_regression(X, y, n_neighbors=None):
    """
    Calculates similarity based on labels using X (data) y (labels)
    
    this considers X, by use knn first and then a distance metric - in this setting
    we will use the rbf kernel for similarity. 
    
    Then if X is "far" in the knn sense we will set to 0
    we can determine "distance" based on clusters? that is if we build
    a cluster around this obs, which other observations are closest. 
    
    
    """
    from sklearn.neighbors import NearestNeighbors
    if n_neighbors is None:
        n_neighbors = max(int(X.shape[0] * 0.05)+1, 2)
    
    # use NerestNeighbors to determine closest obs
    y_ = np.array(y).reshape(-1,1)
    nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto').fit(y_)
    return np.multiply(nbrs.kneighbors_graph(y_).toarray(), rbf_kernel(X, gamma=1))
Exemple #40
0
def MAO_lambda_Diversity(idx, yp, ssc_method="none", lam=0.6):
    # MAO lambda: trade-off between uncertainty and diversity
    K = rbf_kernel(active.xunlab, gamma=active.gamma)  #provisional kernel
    Sidx = np.zeros(query_points, dtype=type(idx[0]))
    for j in np.arange(query_points):
        # Add the first point, and remove it from pool
        Sidx[j] = idx[0]
        idx = idx[1:]
        # Compute distances (kernel matrix)
        # Distances between selected samples (Sidx) and the rest (idx)
        Kdist = np.abs(K[Sidx[0:j + 1], :][:, idx])
        # Obtain the minimum distance for each column
        Kdist = Kdist.min(axis=0)
        # Trade-off between AL algorithm and Diversity
        if ssc_method == "ssc":
            heuristic = yp[idx, -1] * lam + Kdist * (1 - lam)
        elif ssc_method == "none":
            heuristic = yp[idx] * lam + Kdist * (1 - lam)
        idx = idx[heuristic.argsort()]  # axis=0
    # Move selected samples from unlabeled set to labeled set
    return active.updateLabels(Sidx)
def kernel(x, k=0):
    n = len(x)
    # create matrices A, D and L
    A = rbf_kernel(x, gamma=0.55)
    A[np.arange(n), np.arange(n)] = 0
    D = np.diag(A.sum(axis=0)**(-0.5))
    L = D @ A @ D

    # find eigenpairs and take the k biggest ones
    l, v = eig(L)
    i = np.flip(l.argsort())
    k = k or find_gap(l[i])
    i = i[:k]

    # create new, normalised representation of data points
    x_ = v[:, i]
    n = norm(x_, axis=1, keepdims=True)
    x_ = x_ / n

    k = linear_kernel(x_)
    return k
Exemple #42
0
 def transform(self, X):
     nt = X.shape[0]
     if self._kernel_type == 'rbf':
         K = rbf_kernel(X, self._X, gamma=self._gamma)
     elif self._kernel_type == 'poly':
         K = polynomial_kernel(X,
                               self._X,
                               degree=self._degree,
                               coef0=self._coef0)
     elif self._kernel_type == 'linear':
         K = linear_kernel(X, self._X)
     if self._centred == True:
         """
         YOUR CODE
         """
         K1 = (K - 1. / self._n * np.ones((nt, self._n)).dot(self._K))
         K2 = np.eye(self._n) - 1. / self._n * np.ones((self._n, self._n))
         Ko = K1.dot(K2)
     else:
         Ko = K
     return Ko
    def _rbf_kernel(self, data_validation, data_training):
        """Radial basis function

        Parameters
        ----------
        data_validation : ndarray
            Validation data
        data_training : ndarray
            Training data

        Returns
        -------
        kernel : ndarray
            Kernel similarity matrix
        """
        if isinstance(self.gamma, str):
            gamma = 1 / data_training.shape[1]
        else:
            gamma = self.gamma

        return rbf_kernel(data_validation, data_training, gamma)
Exemple #44
0
def get_RBF(A, s=1.):
    """ Compute radial basis function kernel.
    
    Parameters:
        A -- Feature matrix.
        s -- Scale parameter (positive float, 1.0 by default).
        
    Return:
        K -- Radial basis function kernel matrix.
    """

    from sklearn.metrics.pairwise import euclidean_distances, rbf_kernel
    from sklearn.preprocessing import scale

    A = scale(A)
    dist_matrix = euclidean_distances(A, A, None, squared=True)
    dist_vector = dist_matrix[np.nonzero(np.tril(dist_matrix))]
    dist_median = np.median(dist_vector)
    K = rbf_kernel(A, None, dist_median * s)

    return K
Exemple #45
0
    def extract_batch(self, inp, idx_i, idx_j, batch_X):
        if self.cached_rawbatch is None or self.cached_rawbatch.shape[
                0] < batch_X.shape[0]:
            self.cached_rawbatch = np.zeros((batch_X.shape[0], len(self.fext)),
                                            self.fext.dtype)
            rawbatch = self.cached_rawbatch
        else:
            rawbatch = self.cached_rawbatch[:idx_i.shape[0]]
        self.fext.extract_batch(inp, idx_i, idx_j, rawbatch)

        Xscaled = rawbatch.astype(np.float64)
        Xscaled -= self.mean
        Xscaled /= self.std
        if self.kernel == 'poly':
            K_y = fast_poly_kernel(Xscaled, self.basis,
                                   degree=self.degree).astype(self.dtype)
        else:
            K_y = rbf_kernel(Xscaled, self.basis,
                             gamma=self.gamma).astype(self.dtype)

        batch_X[:] = self.phi_map.dot(K_y.T).T
Exemple #46
0
def Gram_Matrix(Kernel, X_set, Degree, Gamma):
    print("Computing Gram Matrix...")
    Gram = np.zeros(shape=(X_set.shape[0], X_set.shape[0]))
    for i in range(0, (X_set.shape[0])):
        for j in range(0, (X_set.shape[0])):
            if Kernel == 'poly':
                Gram[i, j] = polynomial_kernel(X_set[i], X_set[j], Degree)
            elif Kernel == 'rbf':
                Gram[i, j] = rbf_kernel(X_set[i], X_set[j], Gamma)
            elif Kernel == 'linear':
                Gram[i, j] = polynomial_kernel(X_set[i],
                                               X_set[j],
                                               Degree,
                                               coef0=0)
    #Use following instruction to fix Gram matrix symmetric problem
    Gram = np.maximum(Gram, Gram.transpose())
    #Use following instruction to fix CPLEX Error 5002 (objective is not convex)
    if set_kernel == 'poly' or set_kernel == 'rbf':
        Gram = Gram + np.identity(Gram.shape[1])
    print("Done")
    return Gram
	def choose_article(self, selected_user, article_pool, time):
		rbf_row=rbf_kernel(self.user_features[selected_user].reshape(1,-1), self.user_features)
		neighbors=np.argsort(rbf_row)[0][self.user_num-self.k:]
		#neighbors=self.neighbors[selected_user]
		neighbors=list(set(neighbors)&set(self.served_users))
		if (len(neighbors)==0):
			self.user_cluster_features[selected_user]=self.user_features[selected_user]
		else:
			if (len(neighbors)==1):
				weights=[1]
			else:
				weights=rbf_row[0][neighbors]/np.sum(rbf_row[0][neighbors])
			self.user_cluster_features[selected_user]=np.average(self.user_features[neighbors], weights=weights, axis=0)

		mean=np.dot(self.artificial_article_features[article_pool], self.user_cluster_features[selected_user])
		temp1=np.dot(self.artificial_article_features[article_pool], np.linalg.inv(self.cluster_cor_matrix[selected_user]))
		temp2=np.sum(temp1*self.artificial_article_features[article_pool], axis=1)*np.log(time+1)
		var=np.sqrt(temp2)
		pta=mean+self.alpha*var
		article_picked=np.argmax(pta)
		article_picked=article_pool[article_picked]
		return article_picked, neighbors
Exemple #48
0
    def load_data(self, X, y, gamma=None, docalkernel=False, savefile=None, testfile=None, dobin=False):
      self.X = X
      if dobin:
          bins = [-1.0, -0.67, -0.33, 0, 0.33, 0.67, 1.0]
          # bins = [-1.0, 0, 1.0]
          binned  = np.digitize(self.X, bins )
          self.X=np.array([bins[binned[i, j] - 1] for i in range(np.shape(self.X)[0]) for j in range(np.shape(self.X)[1])]).reshape(np.shape(self.X))

      self.y = y
      if testfile is not None:
          dat2 = load_svmlight_file(testfile)
          self.testX = dat2[0].todense()
          if dobin:
              bins = [-1.0, -0.67, -0.33, 0, 0.33, 0.67, 1.0]
              binned = np.digitize(self.testX, bins)
              self.testX = np.array([bins[binned[i, j] - 1] for i in range(np.shape(self.testX)[0]) for j in range(np.shape(self.testX)[1])]).reshape(np.shape(self.testX))

          self.testy = dat2[1]
      # print np.shape(self.X)

      self.gamma = gamma
      self.kernel = rbf_kernel(self.X, gamma=gamma)
Exemple #49
0
def lalign_kernel(X, y, kernel="rbf", alpha=2, sigma=5):
    # X: np array of shape (n_samples, n_features) where n_pos is the nb of positives per samples
    # returns E_(x,x')~p (kernel(y, y') * ||f(x) - f(x')||_2**alpha)
    from sklearn.metrics import pairwise_distances
    from sklearn.metrics.pairwise import rbf_kernel
    if kernel == "rbf":
        kernel = lambda y1, y2: rbf_kernel(y1, y2, gamma=1. / (2 * sigma**2))
    else:
        assert hasattr(kernel, '__call__'), 'kernel must be a callable'
    if len(X.shape) == 3:
        # Merges the first 2 dimensions
        y = y.reshape(y.shape[0] * y.shape[1], -1)
        X = X.reshape(X.shape[0] * X.shape[1], -1)
        assert len(y) == len(X)
    weights = kernel(y, y)  # (n_samples, n_samples)
    weights = (1 - np.eye(len(weights))) * weights
    weights /= weights.sum(axis=1)
    dist_matrix = pairwise_distances(
        X / np.linalg.norm(X, 2, axis=1, keepdims=True),
        metric='euclidean')**alpha
    dist = (dist_matrix * weights).sum() / weights.sum()
    return dist
    def prepare(self, X, y, gamma=0.7, δ_p=1e-6, δ_n=1e-6):
        '''
        compute extra information for gaussian kernel method
        
        Parameters:
        X(np.array): 2D matrix containing the whole dataset, rows are samples, columns are variables
        y(np.array): 1D array containing the labels of the whole dataset
        kernel(bool): a switch that determines if parameters for kenerl method will be computed 
        gamma(float): a constant used in the rbf kernel, check sklearn.metrics.pairwise.rbf_kernel
        δ_p/δ_n (float): regularization terms that ensures the S++ of the kernel covaiance matrix F
        '''

        X_p, y_p, w_p, X_n, y_n, w_n = super().prepare(X, y)

        self.gamma = gamma
        self.m_p = len(y_p)
        self.m_n = len(y_n)
        self.J_p = np.zeros((self.m_p + self.m_n, self.m_p + self.m_n))
        self.J_p[0:self.m_p, 0:self.m_p] = 1 / np.sqrt(
            self.m_p) * (np.identity(self.m_p) - 1 / self.m_p * np.ones(
                (self.m_p, self.m_p)))
        self.J_n = np.zeros((self.m_p + self.m_n, self.m_p + self.m_n))
        self.J_n[self.m_p:, self.m_p:] = 1 / np.sqrt(
            self.m_n) * (np.identity(self.m_n) - 1 / self.m_n * np.ones(
                (self.m_n, self.m_n)))
        self.g_p = np.zeros(self.m_p + self.m_n)
        self.g_p[0:self.m_p] = 1 / self.m_p * np.ones(self.m_p)
        self.g_n = np.zeros(self.m_p + self.m_n)
        self.g_n[self.m_p:] = 1 / self.m_n * np.ones(self.m_n)
        self.X_combine = np.append(X_p, X_n, axis=0)
        self.G = rbf_kernel(X=self.X_combine,
                            Y=self.X_combine,
                            gamma=self.gamma)
        self.F_p = self.G @ self.J_p @ self.J_p.T @ self.G + δ_p * self.G
        self.F_n = self.G @ self.J_n @ self.J_n.T @ self.G + δ_n * self.G
        self.F_p_sqrt = np.real(sqrtm(self.F_p))
        self.F_n_sqrt = np.real(sqrtm(self.F_n))
        # set up varaibles for cvxpy
        self.α_kernel = cp.Variable(shape=(len(self.G), 1))
Exemple #51
0
def _build_kernel(x, kernel, gamma=None):

    if kernel in {'pearson', 'spearman'}:
        if kernel == 'spearman':
            x = np.apply_along_axis(rankdata, 1, x)
        return np.corrcoef(x)

    if kernel in {'cosine', 'normalized_angle'}:
        x = 1 - squareform(pdist(x, metric='cosine'))
        if kernel == 'normalized_angle':
            x = 1 - np.arccos(x, x) / np.pi
        return x

    if kernel == 'gaussian':
        if gamma is None:
            gamma = 1 / x.shape[1]
        return rbf_kernel(x, gamma=gamma)

    if callable(kernel):
        return kernel(x)

    raise ValueError("Unknown kernel '{0}'.".format(kernel))
Exemple #52
0
def bourgain_embedding_matrix(distance_matrix):
    distance_matrix = np.array(distance_matrix)
    n = len(distance_matrix)
    if n == 1:
        return distance_matrix
    np.random.seed(123)
    distort_elements = []
    r = range(n)
    k = int(math.ceil(math.log(n) / math.log(2) - 1))
    t = int(math.ceil(math.log(n)))
    counter = 0
    for i in range(0, k + 1):
        for t in range(t):
            s = np.random.choice(r, 2**i)
            for j in r:
                d = min([distance_matrix[j][s] for s in s])
                counter += len(s)
                if i == 0 and t == 0:
                    distort_elements.append([d])
                else:
                    distort_elements[j].append(d)
    return rbf_kernel(distort_elements, distort_elements)
Exemple #53
0
 def estimate_density(self, X):
     model = self.model
     if self.algo == 'kde':
         # model : kde scikit-learn
         self.density = np.exp(model.score_samples(X))
     elif self.algo == 'mom-kde':
         # model : list of kdes scikit-learn
         z = []
         for k in range(len(model)):
             kde_k = model[k]
             z.append(np.exp(kde_k.score_samples(X)))
         self.density = np.median(z, axis=0)
     elif self.algo == 'rkde':
         # model : weights vector w
         n_samples, d = self.X_data.shape
         m = X.shape[0]
         K_plot = np.zeros((m, n_samples))
         for i_d in range(d):
             temp_xpos = X[:, i_d].reshape((-1, 1))
             temp_x = self.X_data[:, i_d].reshape((-1, 1))
             K_plot = K_plot + (np.dot(np.ones((m, 1)), temp_x.T) -
                                np.dot(temp_xpos, np.ones(
                                    (1, n_samples))))**2
         K_plot = kde_lib.gaussian_kernel(K_plot, self.bandwidth, d)
         z = np.dot(K_plot, model)
         self.density = z
     elif self.algo == 'spkde':
         # model : weights vector a
         d = self.X_data.shape[1]
         gamma = 1. / (2 * (self.bandwidth**2))
         GG = rbf_kernel(self.X_data, X, gamma=gamma) * (
             2 * np.pi * self.bandwidth**2)**(-d / 2.)
         z = np.zeros((X.shape[0]))
         for j in range(X.shape[0]):
             for i in range(len(model)):
                 z[j] += model[i] * GG[i, j]
         self.density = z
     else:
         print('no algo specified')
Exemple #54
0
def gradForm2(Data, W, w, gamma, K1=None):
    # gradient of L= 1/2N (k(x1,x2)-cos(Wx1)cos(Wx2))
    import numpy as np
    from sklearn.metrics import pairwise

    Nexp = np.shape(W)[1]
    Ndata, Nfeat = np.shape(Data)

    if K1 is None:
        K1 = pairwise.rbf_kernel(Data, gamma=gamma)

    C = np.zeros((Nfeat, Ndata))
    B = np.zeros((Nfeat, Ndata))
    for i in range(Ndata):
        X = np.tile(Data[i, ], (Ndata, 1))
        K = K1[i, :]
        K = K[:, np.newaxis]
        #
        c1 = np.outer(
            np.cos(np.dot(Data, w)) * K, np.sin(np.dot(Data[i, ], w)))
        c2 = np.outer(
            np.sin(np.dot(Data, w)) * K, np.cos(np.dot(Data[i, ], w)))
        C[:, i] = np.squeeze((np.dot((X.T), c1) + np.dot((Data.T), c2)))
        #

        AK = np.dot(np.cos(np.dot(Data[i, ], W)), np.cos(np.dot(Data, W)).T)
        AK = AK[:, np.newaxis]
        #
        b1 = np.outer(
            np.cos(np.dot(Data, w)) * AK, np.sin(np.dot(Data[i, ], w)))
        b2 = np.outer(
            np.sin(np.dot(Data, w)) * AK, np.cos(np.dot(Data[i, ], w)))
        B[:, i] = np.squeeze((np.dot((X.T), b1) + np.dot((Data.T), b2)))
        #

    L = (1.0 / Ndata**2) * (np.sum(C, axis=1) -
                            (2.0 / Nexp) * np.sum(B, axis=1))
    L = L[:, np.newaxis]
    return L
Exemple #55
0
def Lossfunction(Data, W, gamma, K=None, w=None):
    import numpy as np
    from sklearn.metrics import pairwise

    if w is None:
        W = W
    else:
        W = np.concatenate((W, w), axis=1)

    Nexp = np.shape(W)[1]
    Ndata, Nfeat = np.shape(Data)
    # if kernel is not provided
    if K is None:
        K = pairwise.rbf_kernel(Data, gamma=gamma)

    Phi = np.cos(np.dot(Data, W))
    AK = (2.0 / Nexp) * np.dot(Phi, Phi.T)

    L = np.sum((K - AK)**2) / Ndata**2
    #    L= np.linalg.norm(K-AK)/np.linalg.norm(K)

    return L
Exemple #56
0
def kernel_matrix(X, sigma, kernel, pkDegree, c0):

    print("Calculating Kernel matrix")

    # Value of sigma is very important, and objective of research.Here default value.
    # Get dimensions of square distance Matrix N
    N = X.shape[0]
    # Initialise with zeros Kernel matrix
    K = np.zeros((N, N))

    if kernel == 'gaussian':
        gamma = 0.5 / sigma**2
        K = rbf_kernel(X, gamma=gamma)
    elif kernel == 'laplacian':
        gamma = 1 / sigma
        K = laplacian_kernel(X, gamma=gamma)
    elif kernel == 'linear':
        K = linear_kernel(X)
    elif kernel == 'polynomial':
        K = polynomial_kernel(X, gamma=sigma, degree=pkDegree, coef0=c0)

    return K
Exemple #57
0
def GP(seq_length=30, num_samples=28*5*100, num_signals=1, scale=0.1, kernel='rbf', **kwargs):
    # the shape of the samples is num_samples x seq_length x num_signals
    samples = np.empty(shape=(num_samples, seq_length, num_signals))
    #T = np.arange(seq_length)/seq_length    # note, between 0 and 1
    T = np.arange(seq_length)    # note, not between 0 and 1
    if kernel == 'periodic':
        cov = periodic_kernel(T)
    elif kernel =='rbf':
        cov = rbf_kernel(T.reshape(-1, 1), gamma=scale)
    else:
        raise NotImplementedError
    # scale the covariance
    cov *= 0.2
    # define the distribution
    mu = np.zeros(seq_length)
    print(np.linalg.det(cov))
    distribution = multivariate_normal(mean=np.zeros(cov.shape[0]), cov=cov)
    pdf = distribution.logpdf
    # now generate samples
    for i in range(num_signals):
        samples[:, :, i] = distribution.rvs(size=num_samples)
    return samples, pdf
Exemple #58
0
def updating_weight(j, weights, X, u, v, m, variances, nominator):
    """
    j: weight vector dimension to update
    weights: weights vector
    X: data sample
    u: fuzzy membership degree
    v: centroid
    m: constant
    """
    denominator = 0.0
    quantile = np.divide(1, variances[j])
    for i in range(v.shape[0]):
        k_sum = 0
        for k in range(X.shape[0]):
            kernel = rbf_kernel(X[k, j].reshape(-1, 1),
                                v[i, j].reshape(-1, 1),
                                gamma=quantile)
            k_sum += np.multiply(np.power(u[i, k], m),
                                 np.multiply(2, 1 - kernel))
        denominator += k_sum

    return nominator / denominator
Exemple #59
0
def updating_centroid(i, j, u, X, m, v, variances):
    """
    i: cluster position
    k: sample position
    u: fuzzy membership degree
    X: data sample
    m: constant
    v: centroid
    """
    nominators = []
    denominators = []
    quantile = np.divide(1, variances[j])
    for k in range(X.shape[0]):
        kernel = rbf_kernel(X[k, j].reshape(-1, 1),
                            v[i, j].reshape(-1, 1),
                            gamma=quantile)
        nominators.append(
            np.multiply(np.multiply(np.power(u[i, k], m), kernel), X[k, j]))
        denominators.append(np.multiply(np.power(u[i, k], m), kernel))
    nominators = np.array(nominators)
    denominators = np.array(denominators)
    return nominators.sum() / denominators.sum()
Exemple #60
0
def UPDATE_PROBABILITY_PARTIAL(R, X_train, s):
    R = np.array(R)
    temp = R

    C_pie = rbf_kernel(X_train, X_train[R, :])
    k_pie = int(s / 2)
    ui_m, s, vt = linalg.svd(C_pie, full_matrices=False)

    new_K = (ui_m[:, 0:k_pie]).dot(np.diag(1 / np.sqrt(s[0:k_pie]))).dot(
        vt[0:k_pie, :])

    C_nys = new_K
    E = C_pie - C_nys
    temp_p = np.zeros((X_train.shape[0], 1)).reshape(-1, 1)
    for j in range(X_train.shape[0]):
        if j in temp:
            temp_p[j] = 0
        else:
            temp_p[j] = np.linalg.norm(E[j], ord=2)
    P = np.square(temp_p / np.sqrt(np.sum(np.square(temp_p))))

    return P