def kmm(Xtrain, Xtest, sigma): n_tr = len(Xtrain) n_te = len(Xtest) # calculate Kernel print('Computing kernel for training data ...') K_ns = sk.rbf_kernel(Xtrain, Xtrain, sigma) # make it symmetric K = 0.9 * (K_ns + K_ns.transpose()) # calculate kappa print('Computing kernel for kappa ...') kappa_r = sk.rbf_kernel(Xtrain, Xtest, sigma) ones = numpy.ones(shape=(n_te, 1)) kappa = numpy.dot(kappa_r, ones) kappa = -(float(n_tr) / float(n_te)) * kappa # calculate eps eps = (math.sqrt(n_tr) - 1) / math.sqrt(n_tr) # constraints A0 = numpy.ones(shape=(1, n_tr)) A1 = -numpy.ones(shape=(1, n_tr)) A = numpy.vstack([A0, A1, -numpy.eye(n_tr), numpy.eye(n_tr)]) b = numpy.array([[n_tr * (eps + 1), n_tr * (eps - 1)]]) b = numpy.vstack([b.T, -numpy.zeros(shape=(n_tr, 1)), numpy.ones(shape=(n_tr, 1)) * 1000]) print('Solving quadratic program for beta ...') P = matrix(K, tc='d') q = matrix(kappa, tc='d') G = matrix(A, tc='d') h = matrix(b, tc='d') beta = solvers.qp(P, q, G, h) return [i for i in beta['x']]
def _get_kernel(self, X, y=None): if self.kernel == "rbf": if y is None: return rbf_kernel(X, X, gamma=self.gamma) else: return rbf_kernel(X, y, gamma=self.gamma) elif self.kernel == "knn": if self.nn_fit is None: self.nn_fit = NearestNeighbors(self.n_neighbors).fit(X) if y is None: # Nearest neighbors returns a directed matrix. dir_graph = self.nn_fit.kneighbors_graph(self.nn_fit._fit_X, self.n_neighbors, mode='connectivity') # Making the matrix symmetric un_graph = dir_graph + dir_graph.T # Since it is a connectivity matrix, all values should be # either 0 or 1 un_graph[un_graph > 1.0] = 1.0 return un_graph else: return self.nn_fit.kneighbors(y, return_distance=False) else: raise ValueError("%s is not a valid kernel. Only rbf and knn" " are supported at this time" % self.kernel)
def _Gram(self, X): if X is self.X: if self.Gs_train is None: kernel_scalar = rbf_kernel(self.X, gamma=self.gamma)[:, :, newaxis, newaxis] delta = subtract(X.T[:, newaxis, :], self.X.T[:, :, newaxis]) self.Gs_train = asarray(transpose( 2 * self.gamma * kernel_scalar * (2 * self.gamma * (delta[:, newaxis, :, :] * delta[newaxis, :, :, :]).transpose( (3, 2, 0, 1)) + ((self.p - 1) - 2 * self.gamma * _norm_axis_0(delta)[:, :, newaxis, newaxis]**2) * eye(self.p)[newaxis, newaxis, :, :]), (0, 2, 1, 3) )).reshape((self.p * X.shape[0], self.p * self.X.shape[0])) return self.Gs_train kernel_scalar = rbf_kernel(X, self.X, gamma=self.gamma)[:, :, newaxis, newaxis] delta = subtract(X.T[:, newaxis, :], self.X.T[:, :, newaxis]) return asarray(transpose( 2 * self.gamma * kernel_scalar * (2 * self.gamma * (delta[:, newaxis, :, :] * delta[newaxis, :, :, :]).transpose( (3, 2, 0, 1)) + ((self.p - 1) - 2 * self.gamma * _norm_axis_0(delta).T[:, :, newaxis, newaxis]**2) * eye(self.p)[newaxis, newaxis, :, :]), (0, 2, 1, 3) )).reshape((self.p * X.shape[0], self.p * self.X.shape[0]))
def hsic(x,y,sigma): """Compute HSIC between two random variables Parameters ---------- x : array, shape(n_instances,1) vector containing m observations of the first random variable y : array, shape(n_instances,1) vector containing m observations of the second random variable sigma : scale parameter for Gaussian kernel Returns ------- hsic_value : float, HSIC value of the two input random variables """ # m is the number of observations here m = len(x) gamma = 1.0/(2*sigma**2) k = rbf_kernel(x,x,gamma) l = rbf_kernel(y,y,gamma) for i in range(m): k[i,i] = 0 l[i,i] = 0 h = np.eye(m)-1.0/m hsic_value = (1.0/(m-1)**2)*np.trace(np.dot(np.dot(np.dot(k,h),l),h)) return hsic_value
def fit(self, X): A = tools.kmeans_centroids(X, self.n_prototypes).cluster_centers_ self.W = rbf_kernel(A, A, gamma = 1./self.sigma2) self.H = rbf_kernel(X, A, gamma = 1./self.sigma2) self.W_dagger = np.linalg.pinv(self.W) d_tilde = self.H.dot(self.W_dagger.dot(self.H.T.sum(axis=1))) self.HtH = self.H.T.dot(self.H) self.HtSH = (self.H.T * d_tilde).dot(self.H) - self.HtH.dot(self.W_dagger).dot(self.HtH.T) self.n = X.shape[0]
def test_pairwise_kernels(): """ Test the pairwise_kernels helper function. """ def callable_rbf_kernel(x, y, **kwds): """ Callable version of pairwise.rbf_kernel. """ K = rbf_kernel(np.atleast_2d(x), np.atleast_2d(y), **kwds) return K rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) Y = rng.random_sample((2, 4)) # Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS. test_metrics = ["rbf", "sigmoid", "polynomial", "linear", "chi2", "additive_chi2"] for metric in test_metrics: function = PAIRWISE_KERNEL_FUNCTIONS[metric] # Test with Y=None K1 = pairwise_kernels(X, metric=metric) K2 = function(X) assert_array_almost_equal(K1, K2) # Test with Y=Y K1 = pairwise_kernels(X, Y=Y, metric=metric) K2 = function(X, Y=Y) assert_array_almost_equal(K1, K2) # Test with tuples as X and Y X_tuples = tuple([tuple([v for v in row]) for row in X]) Y_tuples = tuple([tuple([v for v in row]) for row in Y]) K2 = pairwise_kernels(X_tuples, Y_tuples, metric=metric) assert_array_almost_equal(K1, K2) # Test with sparse X and Y X_sparse = csr_matrix(X) Y_sparse = csr_matrix(Y) if metric in ["chi2", "additive_chi2"]: # these don't support sparse matrices yet assert_raises(ValueError, pairwise_kernels, X_sparse, Y=Y_sparse, metric=metric) continue K1 = pairwise_kernels(X_sparse, Y=Y_sparse, metric=metric) assert_array_almost_equal(K1, K2) # Test with a callable function, with given keywords. metric = callable_rbf_kernel kwds = {} kwds['gamma'] = 0.1 K1 = pairwise_kernels(X, Y=Y, metric=metric, **kwds) K2 = rbf_kernel(X, Y=Y, **kwds) assert_array_almost_equal(K1, K2) # callable function, X=Y K1 = pairwise_kernels(X, Y=X, metric=metric, **kwds) K2 = rbf_kernel(X, Y=X, **kwds) assert_array_almost_equal(K1, K2)
def compute_err(parameters): X,k_train,USV_k_train,n_SR,method,gamma,seed,k = parameters np.random.seed(seed) idx_SR = subsampling(X,int(n_SR),method) X_SR = X[idx_SR] k_SR = rbf_kernel(X_SR,X_SR,gamma=gamma) U_SR, S_SR, V_SR = np.linalg.svd(k_SR) USV_k_SR = [U_SR, S_SR, V_SR] k_train_SR = rbf_kernel(X,X_SR,gamma=gamma) rel_err, USV_k_SR = rel_approx_error(k_train, USV_k_SR, idx_SR) rel_acc = rel_approx_acc(k_train,USV_k_train,k_SR,USV_k_SR,idx_SR,k) quan_err = quan_error(X,X_SR) return [rel_err,quan_err,rel_acc]
def fit(self, X, y, unlabeled_data=None): num_data = X.shape[0] + unlabeled_data.shape[0] num_labeled = X.shape[0] num_unlabeled = unlabeled_data.shape[0] labeled = np.zeros((num_data,), dtype=np.float32) labeled[0:num_labeled] = 1.0 if issparse(X): self.X_ = vstack((util.cast_to_float32(X), util.cast_to_float32(unlabeled_data)), format='csr') else: self.X_ = np.concatenate((util.cast_to_float32(X), util.cast_to_float32(unlabeled_data))) self.gamma = ( self.gamma if self.gamma is not None else 1.0 / X.shape[1]) self.kernel_params = {'gamma':self.gamma, 'degree':self.degree, 'coef0':self.coef0} kernel_matrix = pairwise_kernels(self.X_, metric=self.kernel, filter_params=True, **self.kernel_params) A = np.dot(np.diag(labeled), kernel_matrix) if self.nu2 != 0: if self.kernel == 'rbf': laplacian_kernel_matrix = kernel_matrix else: laplacian_kernel_matrix = rbf_kernel(self.X_, gamma=self.gamma) laplacian_x_kernel = np.dot(graph_laplacian( laplacian_kernel_matrix, normed=self.normalize_laplacian), kernel_matrix) A += self.nu2 * laplacian_x_kernel y = np.concatenate((y, -np.ones((num_unlabeled,), dtype=np.float32)), axis=0) super(LapRLSC, self).fit(A, y, class_for_unlabeled=-1)
def __init__(self, *args, **kwargs): super(QUIRE, self).__init__(*args, **kwargs) self.Uindex = [idx for idx, _ in self.dataset.get_unlabeled_entries()] self.Lindex = [idx for idx in range(len(self.dataset)) if idx not in self.Uindex] self.lmbda = kwargs.pop("lambda", 1.0) X, self.y = zip(*self.dataset.get_entries()) self.y = list(self.y) self.kernel = kwargs.pop("kernel", "rbf") if self.kernel == "rbf": self.K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop("gamma", 1.0)) elif self.kernel == "poly": self.K = polynomial_kernel( X=X, Y=X, coef0=kwargs.pop("coef0", 1), degree=kwargs.pop("degree", 3), gamma=kwargs.pop("gamma", 1.0) ) elif self.kernel == "linear": self.K = linear_kernel(X=X, Y=X) elif hasattr(self.kernel, "__call__"): self.K = self.kernel(X=np.array(X), Y=np.array(X)) else: raise NotImplementedError if not isinstance(self.K, np.ndarray): raise TypeError("K should be an ndarray") if self.K.shape != (len(X), len(X)): raise ValueError("kernel should have size (%d, %d)" % (len(X), len(X))) self.L = np.linalg.inv(self.K + self.lmbda * np.eye(len(X)))
def bourgain_embedding_matrix(distance_matrix): """Use Bourgain algorithm to embed the neural architectures based on their edit-distance. Args: distance_matrix: A matrix of edit-distances. Returns: A matrix of distances after embedding. """ distance_matrix = np.array(distance_matrix) n = len(distance_matrix) if n == 1: return distance_matrix np.random.seed(123) distort_elements = [] r = range(n) k = int(math.ceil(math.log(n) / math.log(2) - 1)) t = int(math.ceil(math.log(n))) counter = 0 for i in range(0, k + 1): for t in range(t): s = np.random.choice(r, 2 ** i) for j in r: d = min([distance_matrix[j][s] for s in s]) counter += len(s) if i == 0 and t == 0: distort_elements.append([d]) else: distort_elements[j].append(d) return rbf_kernel(distort_elements, distort_elements)
def _apply_kernel(self, X, y=None): """Apply the selected kernel function to the data.""" if self.kernel == 'linear': phi = linear_kernel(X, y) elif self.kernel == 'rbf': phi = rbf_kernel(X, y, gamma=self.gamma) elif self.kernel == 'poly': phi = polynomial_kernel(X, y, degree=self.degree) elif callable(self.kernel): phi = self.kernel(X, y) if len(phi.shape) != 2: raise ValueError( "Custom kernel function did not return 2D matrix" ) if phi.shape[0] != X.shape[0]: raise ValueError( "Custom kernel function did not return matrix with rows" " equal to number of data points.""" ) else: raise ValueError("Kernel selection is invalid.") phi = phi.T if self.bias_used: phi = np.hstack((np.ones((phi.shape[0], 1)), phi)) return phi
def test_svc_decision_function(): """ Test SVC's decision_function Sanity check, test that decision_function implemented in python returns the same as the one in libsvm """ # multi class: clf = svm.SVC(kernel="linear", C=0.1).fit(iris.data, iris.target) dec = np.dot(iris.data, clf.coef_.T) + clf.intercept_ assert_array_almost_equal(dec, clf.decision_function(iris.data)) # binary: clf.fit(X, Y) dec = np.dot(X, clf.coef_.T) + clf.intercept_ prediction = clf.predict(X) assert_array_almost_equal(dec.ravel(), clf.decision_function(X)) assert_array_almost_equal(prediction, clf.classes_[(clf.decision_function(X) > 0).astype(np.int)]) expected = np.array([-1.0, -0.66, -1.0, 0.66, 1.0, 1.0]) assert_array_almost_equal(clf.decision_function(X), expected, 2) # kernel binary: clf = svm.SVC(kernel="rbf", gamma=1) clf.fit(X, Y) rbfs = rbf_kernel(X, clf.support_vectors_, gamma=clf.gamma) dec = np.dot(rbfs, clf.dual_coef_.T) + clf.intercept_ assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
def test_nystroem_approximation(): # some basic tests rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 4)) # With n_components = n_samples this is exact X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X) K = rbf_kernel(X) assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K) trans = Nystroem(n_components=2, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert_equal(X_transformed.shape, (X.shape[0], 2)) # test callable kernel linear_kernel = lambda X, Y: np.dot(X, Y.T) trans = Nystroem(n_components=2, kernel=linear_kernel, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert_equal(X_transformed.shape, (X.shape[0], 2)) # test that available kernels fit and transform kernels_available = kernel_metrics() for kern in kernels_available: trans = Nystroem(n_components=2, kernel=kern, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert_equal(X_transformed.shape, (X.shape[0], 2))
def cartesian_affinities(data, distance = 2.0, sigma = 1.0): """ Computes affinities between points using euclidean distance, and sets to 0 all affinities for which the points are further than a certain threshold apart. Parameters ---------- data : array, shape (N, M) N instances of M-dimensional data. distance : float Distance threshold, above which all affinities are set to 0. sigma : float Sigma used to compute affinities. Returns ------- A : array, shape (N, N) Symmetric affinity matrix. """ A = pairwise.rbf_kernel(data, data, gamma = (1.0 / (2 * (sigma ** 2)))) if (distance > 0.0): distances = pairwise.pairwise_distances(data) A[np.where(distances > distance)] = 0.0 return A
def test_decision_function(): # Test decision_function # Sanity check, test that decision_function implemented in python # returns the same as the one in libsvm # multi class: clf = svm.SVC(kernel='linear', C=0.1, decision_function_shape='ovo').fit(iris.data, iris.target) dec = np.dot(iris.data, clf.coef_.T) + clf.intercept_ assert_array_almost_equal(dec, clf.decision_function(iris.data)) # binary: clf.fit(X, Y) dec = np.dot(X, clf.coef_.T) + clf.intercept_ prediction = clf.predict(X) assert_array_almost_equal(dec.ravel(), clf.decision_function(X)) assert_array_almost_equal( prediction, clf.classes_[(clf.decision_function(X) > 0).astype(np.int)]) expected = np.array([-1., -0.66, -1., 0.66, 1., 1.]) assert_array_almost_equal(clf.decision_function(X), expected, 2) # kernel binary: clf = svm.SVC(kernel='rbf', gamma=1, decision_function_shape='ovo') clf.fit(X, Y) rbfs = rbf_kernel(X, clf.support_vectors_, gamma=clf.gamma) dec = np.dot(rbfs, clf.dual_coef_.T) + clf.intercept_ assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
def test_spectral_embedding_callable_affinity(seed=36): # Test spectral embedding with callable affinity gamma = 0.9 kern = rbf_kernel(S, gamma=gamma) se_callable = SpectralEmbedding( n_components=2, affinity=(lambda x: rbf_kernel(x, gamma=gamma)), gamma=gamma, random_state=np.random.RandomState(seed), ) se_rbf = SpectralEmbedding(n_components=2, affinity="rbf", gamma=gamma, random_state=np.random.RandomState(seed)) embed_rbf = se_rbf.fit_transform(S) embed_callable = se_callable.fit_transform(S) assert_array_almost_equal(se_callable.affinity_matrix_, se_rbf.affinity_matrix_) assert_array_almost_equal(kern, se_rbf.affinity_matrix_) assert_true(_check_with_col_sign_flipping(embed_rbf, embed_callable, 0.05))
def _apply_kernel(self, x, y): """Apply the selected kernel function to the data.""" if self.kernel == 'linear': phi = linear_kernel(x, y) elif self.kernel == 'rbf': phi = rbf_kernel(x, y, self.coef1) elif self.kernel == 'poly': phi = polynomial_kernel(x, y, self.degree, self.coef1, self.coef0) elif callable(self.kernel): phi = self.kernel(x, y) if len(phi.shape) != 2: raise ValueError( "Custom kernel function did not return 2D matrix" ) if phi.shape[0] != x.shape[0]: raise ValueError( "Custom kernel function did not return matrix with rows" " equal to number of data points.""" ) else: raise ValueError("Kernel selection is invalid.") if self.bias_used: phi = np.append(phi, np.ones((phi.shape[0], 1)), axis=1) return phi
def predict(X,y,gamma): K = rbf_kernel(X.reshape(-1,1),X.reshape(-1,1),gamma) pred = (K * y[:, None]).sum(axis=0) / K.sum(axis=0) return pred
def fit(self, X, y, L): """Fit the model according to the given training data. Prameters --------- X : array-like, shpae = [n_samples, n_features] Training data. y : array-like, shpae = [n_samples] Target values (unlabeled points are marked as 0). L : array-like, shpae = [n_samples, n_samples] Graph Laplacian. """ labeled = y != 0 y_labeled = y[labeled] n_samples, n_features = X.shape n_labeled_samples = y_labeled.size I = sp.eye(n_samples) J = sp.diags(labeled.astype(np.float64)) K = rbf_kernel(X, gamma=self.gamma_k) M = J @ K \ + self.gamma_a * n_labeled_samples * I \ + self.gamma_i * n_labeled_samples / n_samples**2 * L**self.p @ K # Train a classifer self.dual_coef_ = LA.solve(M, y) return self
def _get_affinity_matrix(self, X, Y=None): """Calculate the affinity matrix from data Parameters ---------- X : array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. If affinity is "precomputed" X : array-like, shape (n_samples, n_samples), Interpret X as precomputed adjacency graph computed from samples. Returns ------- affinity_matrix, shape (n_samples, n_samples) """ if self.affinity == 'precomputed': self.affinity_matrix_ = X print( type( self.affinity_matrix_)) return self.affinity_matrix_ # nearest_neigh kept for backward compatibility if self.affinity == 'nearest_neighbors': if sparse.issparse(X): warnings.warn("Nearest neighbors affinity currently does " "not support sparse input, falling back to " "rbf affinity") self.affinity = "rbf" else: self.n_neighbors_ = (self.n_neighbors if self.n_neighbors is not None else max(int(X.shape[0] / 10), 1)) self.affinity_matrix_ = kneighbors_graph(X, self.n_neighbors_) # currently only symmetric affinity_matrix supported self.affinity_matrix_ = 0.5 * (self.affinity_matrix_ + self.affinity_matrix_.T) return self.affinity_matrix_ if self.affinity == 'radius_neighbors': if self.neighbors_radius is None: self.neighbors_radius_ = np.sqrt(X.shape[1]) # to put another defaault value, like diam(X)/sqrt(dimensions)/10 else: self.neighbors_radius_ = self.neighbors_radius self.gamma_ = (self.gamma if self.gamma is not None else 1.0 / X.shape[1]) self.affinity_matrix_ = radius_neighbors_graph(X, self.neighbors_radius_, mode='distance') self.affinity_matrix_.data **= 2 self.affinity_matrix_.data /= -self.neighbors_radius_**2 self.affinity_matrix_.data = np.exp( self.affinity_matrix_.data, self.affinity_matrix_.data ) return self.affinity_matrix_ if self.affinity == 'rbf': self.gamma_ = (self.gamma if self.gamma is not None else 1.0 / X.shape[1]) self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_) return self.affinity_matrix_ self.affinity_matrix_ = self.affinity(X) return self.affinity_matrix_
def __kernel_definition__(self): if self.Kf == 'rbf': return lambda X,Y : rbf_kernel(X,Y,self.rbf_gamma) if self.Kf == 'poly': return lambda X,Y : polynomial_kernel(X, Y, degree=self.poly_deg, gamma=None, coef0=self.poly_coeff) if self.Kf == None or self.Kf == 'linear': return lambda X,Y : linear_kernel(X,Y)
def calculate_affinities(data, neighborhood, sigma): """ Calculates pairwise affinities for the data. Parameters ---------- data : array, shape (N, M) Matrix of M-dimensional data points. neighborhood : float L2 distance threshold for computing affinities, anything outside of this threshold is set to 0. sigma : float Sigma value for computing affinities. Returns ------- affinities : array, shape (N, N) Pairwise affinities for all data points. """ affinities = pairwise.rbf_kernel(data, data, gamma = (1.0 / (2 * sigma * sigma))) distances = pairwise.pairwise_distances(data) # affinities and distances are the same dimensionality: (N, N) affinities[np.where(distances > neighborhood)] = 0.0 return affinities
def test_spectral_embedding_deterministic(): # Test that Spectral Embedding is deterministic random_state = np.random.RandomState(36) data = random_state.randn(10, 30) sims = rbf_kernel(data) embedding_1 = spectral_embedding(sims) embedding_2 = spectral_embedding(sims) assert_array_almost_equal(embedding_1, embedding_2)
def test_pairwise_kernels_callable(): # Test the pairwise_kernels helper function # with a callable function, with given keywords. rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) Y = rng.random_sample((2, 4)) metric = callable_rbf_kernel kwds = {'gamma': 0.1} K1 = pairwise_kernels(X, Y=Y, metric=metric, **kwds) K2 = rbf_kernel(X, Y=Y, **kwds) assert_array_almost_equal(K1, K2) # callable function, X=Y K1 = pairwise_kernels(X, Y=X, metric=metric, **kwds) K2 = rbf_kernel(X, Y=X, **kwds) assert_array_almost_equal(K1, K2)
def test_laplacian_eigenmap_deterministic(): # Test that laplacian eigenmap is deterministic random_state = np.random.RandomState(36) data = random_state.randn(10, 30) sims = rbf_kernel(data) embedding_1 = laplacian_eigenmap(sims) embedding_2 = laplacian_eigenmap(sims) assert_array_almost_equal(embedding_1, embedding_2)
def test_spectral_embeding_import(): random_state = np.random.RandomState(36) data = random_state.randn(10, 30) sims = rbf_kernel(data) assert_warns_message(DeprecationWarning, "spectral_embedding is deprecated", spectral_embedding, sims) assert_warns_message(DeprecationWarning, "SpectralEmbedding is deprecated", SpectralEmbedding)
def decision_function(self, X): if self.kernel == 'linear': f = np.dot(X, self._w) + self._b elif self.kernel == 'rbf': ## rbf_kernel returns array of shape (n_samples_X, n_samples_Y) assert self._ya.shape == (len(self._X), 1) f = np.sum(np.multiply(rbf_kernel(self._X, X), self._ya), axis=0) + self._b f = np.squeeze(np.array(f)) return f
def test_spectral_embedding_precomputed_affinity(seed=36): # Test spectral embedding with precomputed kernel gamma = 1.0 se_precomp = SpectralEmbedding(n_components=2, affinity="precomputed", random_state=np.random.RandomState(seed)) se_rbf = SpectralEmbedding(n_components=2, affinity="rbf", gamma=gamma, random_state=np.random.RandomState(seed)) embed_precomp = se_precomp.fit_transform(rbf_kernel(S, gamma=gamma)) embed_rbf = se_rbf.fit_transform(S) assert_array_almost_equal(se_precomp.affinity_matrix_, se_rbf.affinity_matrix_) assert_true(_check_with_col_sign_flipping(embed_precomp, embed_rbf, 0.05))
def fit(self, X, y): t = time() # get labels for test data # build the graph result is the affinity matrix if self.kernel is 'dbscan' or self.kernel is None: affinity_matrix = self.dbscan(X, self.eps, self.minPts) # it is possible to use other kernels -> as parameter elif self.kernel is 'rbf': affinity_matrix = rbf_kernel(X, X, gamma=self.gamma) elif self.kernel is 'knn': affinity_matrix = NearestNeighbors(self.naighbors).fit(X).kneighbors_graph(X, self.naighbors).toarray() else: raise print( "praph(%s) time %2.3fms"%(self.kernel, (time() - t) *1000)) if affinity_matrix.max() == 0 : print("no affinity matrix found") return y degree_martix = np.diag(affinity_matrix.sum(axis=0)) affinity_matrix = np.matrix(affinity_matrix) try: inserve_degree_matrix = np.linalg.inv(degree_martix) except np.linalg.linalg.LinAlgError as err: if 'Singular matrix' in err.args: # use a pseudo inverse if it's not possible to make a normal of the degree matrix inserve_degree_matrix = np.linalg.pinv(degree_martix) else: raise matrix = inserve_degree_matrix * affinity_matrix # split labels in different vectors to calculate the propagation for the separate label labels = np.unique(y) labels = [x for x in labels if x != self.unlabeledValue] # init the yn1 and y0 y0 = [[1 if (x == l) else 0 for x in y] for l in labels] yn1 = y0 # function to set the probability to 1 if it was labeled in the source toOrgLabels = np.vectorize(lambda x, y : 1 if y == 1 else x , otypes=[np.int0]) # function to set the index's of the source labeled toOrgLabelsIndex = np.vectorize(lambda x, y, z : z if y == 1 else x , otypes=[np.int0]) lastLabels = np.argmax(y0, axis=0) while True: yn1 = yn1 * matrix #first matrix to labels ynLablesIndex = np.argmax(yn1, axis=0) # row-normalize yn1 /= yn1.max() yn1 = toOrgLabels(yn1, y0) for x in y0: ynLablesIndex = toOrgLabelsIndex(ynLablesIndex, x, y0.index(x)) #second original labels to result if np.array_equiv(ynLablesIndex, lastLabels): break lastLabels = ynLablesIndex # result is the index of the labels -> cast index to the given labels toLabeles = np.vectorize(lambda x : labels[x]) return np.array(toLabeles(lastLabels))[0]
def cls(mkl): for data in datasets: print "####################" print '# ',data print "####################" # consider labels with more than 2% t = 0.02 datadir = '../data/' km_dir = datadir + data + "/" if data == 'Fingerprint': kernels = ['PPKr', 'NB','CP2','NI','LB','CPC','RLB','LC','LI','CPK','RLI','CSC'] km_list = [] y = np.loadtxt(km_dir+"y.txt",ndmin=2) p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] for k in kernels: km_f = datadir + data + ("/%s.txt" % k) km_list.append(center(normalize_km(np.loadtxt(km_f)))) pred_f = "../ovkr_result/pred/%s_cvpred_%s.npy" % (data, mkl) pred = ovkr_mkl(km_list, y, mkl, 5, data,data) np.save(pred_f, pred) elif data in image_datasets: y = np.loadtxt(km_dir+"y.txt",ndmin=2) p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] linear_km_list = [] for i in range(1,16): name = 'kernel_linear_%d.txt' % i km_f = km_dir+name km = np.loadtxt(km_f) # normalize input kernel !!!!!!!! linear_km_list.append(center(normalize_km(km))) pred_f = "../ovkr_result/pred/%s_cvpred_%s.npy" % (data, mkl) pred = ovkr_mkl(linear_km_list, y, mkl, 5, data,data) np.save(pred_f, pred) else: rbf_km_list = [] gammas = [2**-13,2**-11,2**-9,2**-7,2**-5,2**-3,2**-1,2**1,2**3] X = np.loadtxt(km_dir+"/x.txt") scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) X = preprocessing.normalize(X) y = np.loadtxt(km_dir+"y.txt") p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] for gamma in gammas: km = rbf_kernel(X, gamma=gamma) # normalize input kernel !!!!!!!! rbf_km_list.append(center(km)) pred_f = "../ovkr_result/pred/%s_cvpred_%s.npy" % (data, mkl) pred = ovkr_mkl(rbf_km_list, y, mkl, 5, data,data) np.save(pred_f, pred)
def test_svr_predict(): # Test SVR's decision_function # Sanity check, test that predict implemented in python # returns the same as the one in libsvm X = iris.data y = iris.target # linear kernel reg = svm.SVR(kernel='linear', C=0.1).fit(X, y) dec = np.dot(X, reg.coef_.T) + reg.intercept_ assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel()) # rbf kernel reg = svm.SVR(kernel='rbf', gamma=1).fit(X, y) rbfs = rbf_kernel(X, reg.support_vectors_, gamma=reg.gamma) dec = np.dot(rbfs, reg.dual_coef_.T) + reg.intercept_ assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())
def test_average_dist(self) : # 3 samples a = [1, 3] b = [0, 4] c = [2, 5] samples = np.array([a, b, c]) dist = al.average_distance(samples) avg_a = (rbf_kernel([a], [b])[0][0] + rbf_kernel([a], [c])[0][0])/2 avg_b = (rbf_kernel([b], [a])[0][0] + rbf_kernel([b], [c])[0][0])/2 avg_c = (rbf_kernel([c], [a])[0][0] + rbf_kernel([c], [b])[0][0])/2 self.assertAlmostEqual(dist[0],avg_a) self.assertAlmostEqual(dist[1],avg_b) self.assertAlmostEqual(dist[2],avg_c)
def test_kernel(self): # compute kernel with special rbf kernel # compute kernel with sklearn kernel # compute kernel between sparse_data_ and sparse_data # compute_kernel between sparse_data and data # compute kernel between sparse_data and random_data # compute_kernel between data and random_data # sklearn_kernel_first = rbf_kernel(self.data, self.data, self.gamma) # sklearn_kernel_verylittle = rbf_kernel(self.data_verylittle, self.data_verylittle) for name_pair, pair in self.pairs_data.items(): data_norm = self.norm_data[name_pair] gamma = self.gamma_data[name_pair] sklearn_kernel = rbf_kernel(pair, pair, gamma=gamma) special_kernel = special_rbf_kernel(pair, pair, gamma=gamma, norm_X=data_norm, norm_Y=data_norm.T, exp_outside=False) special_kernel_flag = special_rbf_kernel(pair, pair, gamma=gamma, norm_X=data_norm, norm_Y=data_norm.T, exp_outside=True) special_kernel[special_kernel < 1e-12] = 0 special_kernel_flag[special_kernel_flag < 1e-12] = 0 sklearn_kernel[sklearn_kernel < 1e-12] = 0 equality = np.allclose(sklearn_kernel, special_kernel) equality_flag = np.allclose(sklearn_kernel, special_kernel_flag) delta = np.linalg.norm(special_kernel - sklearn_kernel) delta_flag = np.linalg.norm(special_kernel_flag - sklearn_kernel) print("Delta flag: {}; delta: {}".format(delta_flag, delta)) self.assertTrue(delta_flag < delta) self.assertTrue(equality, msg=name_pair) self.assertTrue(equality_flag, msg=name_pair)
def get_kernel_matrix(X1, X2=None, kernel='rbf',gamma = 1, degree = 3, coef0=1): #Obtain N1xN2 kernel matrix from N1xM and N2xM data matrices if kernel == 'rbf': K = pairwise.rbf_kernel(X1,X2,gamma = gamma); elif kernel == 'poly': K = pairwise.polynomial_kernel(X1,X2,degree = degree, gamma = gamma, coef0 = coef0); elif kernel == 'linear': K = pairwise.linear_kernel(X1,X2); elif kernel == 'laplacian': K = pairwise.laplacian_kernel(X1,X2,gamma = gamma); elif kernel == 'chi2': K = pairwise.chi2_kernel(X1,X2,gamma = gamma); elif kernel == 'additive_chi2': K = pairwise.additive_chi2_kernel(X1,X2); elif kernel == 'sigmoid': K = pairwise.sigmoid_kernel(X1,X2,gamma = gamma,coef0 = coef0); else: print('[Error] Unknown kernel'); K = None; return K;
def test_fastfood(): """test that Fastfood fast approximates kernel on random data""" # compute exact kernel gamma = 10. kernel = rbf_kernel(X, Y, gamma=gamma) sigma = np.sqrt(1 / (2 * gamma)) # approximate kernel mapping ff_transform = Fastfood(sigma, n_components=1000, random_state=42) pars = ff_transform.fit(X) X_trans = pars.transform(X) Y_trans = ff_transform.transform(Y) # print X_trans, Y_trans kernel_approx = np.dot(X_trans, Y_trans.T) print('approximation:', kernel_approx[:5, :5]) print('true kernel:', kernel[:5, :5]) assert_array_almost_equal(kernel, kernel_approx, decimal=1)
def _kernel(data, centers): """ Euclidean distance from each point to each cluster center. Parameters ---------- data : 2d array (N x Q) Data to be analyzed. There are N data points. centers : 2d array (C x Q) Cluster centers. There are C clusters, with Q features. Returns ------- dist : 2d array (C x N) Euclidean distance from each point, to each cluster center. See Also -------- scipy.spatial.distance.cdist """ return rbf_kernel(data, centers, gamma=0.01).T
def __kernel_definition__(self): """Select the kernel function Returns ------- kernel : a callable relative to selected kernel """ if hasattr(self.kernel, '__call__'): return self.kernel if self.kernel == 'rbf' or self.kernel == None: return lambda X, Y: rbf_kernel(X, Y, self.rbf_gamma) if self.kernel == 'poly': return lambda X, Y: polynomial_kernel(X, Y, degree=self.degree, gamma=self.rbf_gamma, coef0=self.coef0) if self.kernel == 'linear': return lambda X, Y: linear_kernel(X, Y) if self.kernel == 'precomputed': return lambda X, Y: X
def test_spectral_embedding_unnormalized(): # Test that spectral_embedding is also processing unnormalized laplacian # correctly random_state = np.random.RandomState(36) data = random_state.randn(10, 30) sims = rbf_kernel(data) n_components = 8 embedding_1 = spectral_embedding(sims, norm_laplacian=False, n_components=n_components, drop_first=False) # Verify using manual computation with dense eigh laplacian, dd = sparse.csgraph.laplacian(sims, normed=False, return_diag=True) _, diffusion_map = eigh(laplacian) embedding_2 = diffusion_map.T[:n_components] * dd embedding_2 = _deterministic_vector_sign_flip(embedding_2).T assert_array_almost_equal(embedding_1, embedding_2)
def similarity_regression(X, y, n_neighbors=None): """ Calculates similarity based on labels using X (data) y (labels) this considers X, by use knn first and then a distance metric - in this setting we will use the rbf kernel for similarity. Then if X is "far" in the knn sense we will set to 0 we can determine "distance" based on clusters? that is if we build a cluster around this obs, which other observations are closest. """ from sklearn.neighbors import NearestNeighbors if n_neighbors is None: n_neighbors = max(int(X.shape[0] * 0.05)+1, 2) # use NerestNeighbors to determine closest obs y_ = np.array(y).reshape(-1,1) nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto').fit(y_) return np.multiply(nbrs.kneighbors_graph(y_).toarray(), rbf_kernel(X, gamma=1))
def MAO_lambda_Diversity(idx, yp, ssc_method="none", lam=0.6): # MAO lambda: trade-off between uncertainty and diversity K = rbf_kernel(active.xunlab, gamma=active.gamma) #provisional kernel Sidx = np.zeros(query_points, dtype=type(idx[0])) for j in np.arange(query_points): # Add the first point, and remove it from pool Sidx[j] = idx[0] idx = idx[1:] # Compute distances (kernel matrix) # Distances between selected samples (Sidx) and the rest (idx) Kdist = np.abs(K[Sidx[0:j + 1], :][:, idx]) # Obtain the minimum distance for each column Kdist = Kdist.min(axis=0) # Trade-off between AL algorithm and Diversity if ssc_method == "ssc": heuristic = yp[idx, -1] * lam + Kdist * (1 - lam) elif ssc_method == "none": heuristic = yp[idx] * lam + Kdist * (1 - lam) idx = idx[heuristic.argsort()] # axis=0 # Move selected samples from unlabeled set to labeled set return active.updateLabels(Sidx)
def kernel(x, k=0): n = len(x) # create matrices A, D and L A = rbf_kernel(x, gamma=0.55) A[np.arange(n), np.arange(n)] = 0 D = np.diag(A.sum(axis=0)**(-0.5)) L = D @ A @ D # find eigenpairs and take the k biggest ones l, v = eig(L) i = np.flip(l.argsort()) k = k or find_gap(l[i]) i = i[:k] # create new, normalised representation of data points x_ = v[:, i] n = norm(x_, axis=1, keepdims=True) x_ = x_ / n k = linear_kernel(x_) return k
def transform(self, X): nt = X.shape[0] if self._kernel_type == 'rbf': K = rbf_kernel(X, self._X, gamma=self._gamma) elif self._kernel_type == 'poly': K = polynomial_kernel(X, self._X, degree=self._degree, coef0=self._coef0) elif self._kernel_type == 'linear': K = linear_kernel(X, self._X) if self._centred == True: """ YOUR CODE """ K1 = (K - 1. / self._n * np.ones((nt, self._n)).dot(self._K)) K2 = np.eye(self._n) - 1. / self._n * np.ones((self._n, self._n)) Ko = K1.dot(K2) else: Ko = K return Ko
def _rbf_kernel(self, data_validation, data_training): """Radial basis function Parameters ---------- data_validation : ndarray Validation data data_training : ndarray Training data Returns ------- kernel : ndarray Kernel similarity matrix """ if isinstance(self.gamma, str): gamma = 1 / data_training.shape[1] else: gamma = self.gamma return rbf_kernel(data_validation, data_training, gamma)
def get_RBF(A, s=1.): """ Compute radial basis function kernel. Parameters: A -- Feature matrix. s -- Scale parameter (positive float, 1.0 by default). Return: K -- Radial basis function kernel matrix. """ from sklearn.metrics.pairwise import euclidean_distances, rbf_kernel from sklearn.preprocessing import scale A = scale(A) dist_matrix = euclidean_distances(A, A, None, squared=True) dist_vector = dist_matrix[np.nonzero(np.tril(dist_matrix))] dist_median = np.median(dist_vector) K = rbf_kernel(A, None, dist_median * s) return K
def extract_batch(self, inp, idx_i, idx_j, batch_X): if self.cached_rawbatch is None or self.cached_rawbatch.shape[ 0] < batch_X.shape[0]: self.cached_rawbatch = np.zeros((batch_X.shape[0], len(self.fext)), self.fext.dtype) rawbatch = self.cached_rawbatch else: rawbatch = self.cached_rawbatch[:idx_i.shape[0]] self.fext.extract_batch(inp, idx_i, idx_j, rawbatch) Xscaled = rawbatch.astype(np.float64) Xscaled -= self.mean Xscaled /= self.std if self.kernel == 'poly': K_y = fast_poly_kernel(Xscaled, self.basis, degree=self.degree).astype(self.dtype) else: K_y = rbf_kernel(Xscaled, self.basis, gamma=self.gamma).astype(self.dtype) batch_X[:] = self.phi_map.dot(K_y.T).T
def Gram_Matrix(Kernel, X_set, Degree, Gamma): print("Computing Gram Matrix...") Gram = np.zeros(shape=(X_set.shape[0], X_set.shape[0])) for i in range(0, (X_set.shape[0])): for j in range(0, (X_set.shape[0])): if Kernel == 'poly': Gram[i, j] = polynomial_kernel(X_set[i], X_set[j], Degree) elif Kernel == 'rbf': Gram[i, j] = rbf_kernel(X_set[i], X_set[j], Gamma) elif Kernel == 'linear': Gram[i, j] = polynomial_kernel(X_set[i], X_set[j], Degree, coef0=0) #Use following instruction to fix Gram matrix symmetric problem Gram = np.maximum(Gram, Gram.transpose()) #Use following instruction to fix CPLEX Error 5002 (objective is not convex) if set_kernel == 'poly' or set_kernel == 'rbf': Gram = Gram + np.identity(Gram.shape[1]) print("Done") return Gram
def choose_article(self, selected_user, article_pool, time): rbf_row=rbf_kernel(self.user_features[selected_user].reshape(1,-1), self.user_features) neighbors=np.argsort(rbf_row)[0][self.user_num-self.k:] #neighbors=self.neighbors[selected_user] neighbors=list(set(neighbors)&set(self.served_users)) if (len(neighbors)==0): self.user_cluster_features[selected_user]=self.user_features[selected_user] else: if (len(neighbors)==1): weights=[1] else: weights=rbf_row[0][neighbors]/np.sum(rbf_row[0][neighbors]) self.user_cluster_features[selected_user]=np.average(self.user_features[neighbors], weights=weights, axis=0) mean=np.dot(self.artificial_article_features[article_pool], self.user_cluster_features[selected_user]) temp1=np.dot(self.artificial_article_features[article_pool], np.linalg.inv(self.cluster_cor_matrix[selected_user])) temp2=np.sum(temp1*self.artificial_article_features[article_pool], axis=1)*np.log(time+1) var=np.sqrt(temp2) pta=mean+self.alpha*var article_picked=np.argmax(pta) article_picked=article_pool[article_picked] return article_picked, neighbors
def load_data(self, X, y, gamma=None, docalkernel=False, savefile=None, testfile=None, dobin=False): self.X = X if dobin: bins = [-1.0, -0.67, -0.33, 0, 0.33, 0.67, 1.0] # bins = [-1.0, 0, 1.0] binned = np.digitize(self.X, bins ) self.X=np.array([bins[binned[i, j] - 1] for i in range(np.shape(self.X)[0]) for j in range(np.shape(self.X)[1])]).reshape(np.shape(self.X)) self.y = y if testfile is not None: dat2 = load_svmlight_file(testfile) self.testX = dat2[0].todense() if dobin: bins = [-1.0, -0.67, -0.33, 0, 0.33, 0.67, 1.0] binned = np.digitize(self.testX, bins) self.testX = np.array([bins[binned[i, j] - 1] for i in range(np.shape(self.testX)[0]) for j in range(np.shape(self.testX)[1])]).reshape(np.shape(self.testX)) self.testy = dat2[1] # print np.shape(self.X) self.gamma = gamma self.kernel = rbf_kernel(self.X, gamma=gamma)
def lalign_kernel(X, y, kernel="rbf", alpha=2, sigma=5): # X: np array of shape (n_samples, n_features) where n_pos is the nb of positives per samples # returns E_(x,x')~p (kernel(y, y') * ||f(x) - f(x')||_2**alpha) from sklearn.metrics import pairwise_distances from sklearn.metrics.pairwise import rbf_kernel if kernel == "rbf": kernel = lambda y1, y2: rbf_kernel(y1, y2, gamma=1. / (2 * sigma**2)) else: assert hasattr(kernel, '__call__'), 'kernel must be a callable' if len(X.shape) == 3: # Merges the first 2 dimensions y = y.reshape(y.shape[0] * y.shape[1], -1) X = X.reshape(X.shape[0] * X.shape[1], -1) assert len(y) == len(X) weights = kernel(y, y) # (n_samples, n_samples) weights = (1 - np.eye(len(weights))) * weights weights /= weights.sum(axis=1) dist_matrix = pairwise_distances( X / np.linalg.norm(X, 2, axis=1, keepdims=True), metric='euclidean')**alpha dist = (dist_matrix * weights).sum() / weights.sum() return dist
def prepare(self, X, y, gamma=0.7, δ_p=1e-6, δ_n=1e-6): ''' compute extra information for gaussian kernel method Parameters: X(np.array): 2D matrix containing the whole dataset, rows are samples, columns are variables y(np.array): 1D array containing the labels of the whole dataset kernel(bool): a switch that determines if parameters for kenerl method will be computed gamma(float): a constant used in the rbf kernel, check sklearn.metrics.pairwise.rbf_kernel δ_p/δ_n (float): regularization terms that ensures the S++ of the kernel covaiance matrix F ''' X_p, y_p, w_p, X_n, y_n, w_n = super().prepare(X, y) self.gamma = gamma self.m_p = len(y_p) self.m_n = len(y_n) self.J_p = np.zeros((self.m_p + self.m_n, self.m_p + self.m_n)) self.J_p[0:self.m_p, 0:self.m_p] = 1 / np.sqrt( self.m_p) * (np.identity(self.m_p) - 1 / self.m_p * np.ones( (self.m_p, self.m_p))) self.J_n = np.zeros((self.m_p + self.m_n, self.m_p + self.m_n)) self.J_n[self.m_p:, self.m_p:] = 1 / np.sqrt( self.m_n) * (np.identity(self.m_n) - 1 / self.m_n * np.ones( (self.m_n, self.m_n))) self.g_p = np.zeros(self.m_p + self.m_n) self.g_p[0:self.m_p] = 1 / self.m_p * np.ones(self.m_p) self.g_n = np.zeros(self.m_p + self.m_n) self.g_n[self.m_p:] = 1 / self.m_n * np.ones(self.m_n) self.X_combine = np.append(X_p, X_n, axis=0) self.G = rbf_kernel(X=self.X_combine, Y=self.X_combine, gamma=self.gamma) self.F_p = self.G @ self.J_p @ self.J_p.T @ self.G + δ_p * self.G self.F_n = self.G @ self.J_n @ self.J_n.T @ self.G + δ_n * self.G self.F_p_sqrt = np.real(sqrtm(self.F_p)) self.F_n_sqrt = np.real(sqrtm(self.F_n)) # set up varaibles for cvxpy self.α_kernel = cp.Variable(shape=(len(self.G), 1))
def _build_kernel(x, kernel, gamma=None): if kernel in {'pearson', 'spearman'}: if kernel == 'spearman': x = np.apply_along_axis(rankdata, 1, x) return np.corrcoef(x) if kernel in {'cosine', 'normalized_angle'}: x = 1 - squareform(pdist(x, metric='cosine')) if kernel == 'normalized_angle': x = 1 - np.arccos(x, x) / np.pi return x if kernel == 'gaussian': if gamma is None: gamma = 1 / x.shape[1] return rbf_kernel(x, gamma=gamma) if callable(kernel): return kernel(x) raise ValueError("Unknown kernel '{0}'.".format(kernel))
def bourgain_embedding_matrix(distance_matrix): distance_matrix = np.array(distance_matrix) n = len(distance_matrix) if n == 1: return distance_matrix np.random.seed(123) distort_elements = [] r = range(n) k = int(math.ceil(math.log(n) / math.log(2) - 1)) t = int(math.ceil(math.log(n))) counter = 0 for i in range(0, k + 1): for t in range(t): s = np.random.choice(r, 2**i) for j in r: d = min([distance_matrix[j][s] for s in s]) counter += len(s) if i == 0 and t == 0: distort_elements.append([d]) else: distort_elements[j].append(d) return rbf_kernel(distort_elements, distort_elements)
def estimate_density(self, X): model = self.model if self.algo == 'kde': # model : kde scikit-learn self.density = np.exp(model.score_samples(X)) elif self.algo == 'mom-kde': # model : list of kdes scikit-learn z = [] for k in range(len(model)): kde_k = model[k] z.append(np.exp(kde_k.score_samples(X))) self.density = np.median(z, axis=0) elif self.algo == 'rkde': # model : weights vector w n_samples, d = self.X_data.shape m = X.shape[0] K_plot = np.zeros((m, n_samples)) for i_d in range(d): temp_xpos = X[:, i_d].reshape((-1, 1)) temp_x = self.X_data[:, i_d].reshape((-1, 1)) K_plot = K_plot + (np.dot(np.ones((m, 1)), temp_x.T) - np.dot(temp_xpos, np.ones( (1, n_samples))))**2 K_plot = kde_lib.gaussian_kernel(K_plot, self.bandwidth, d) z = np.dot(K_plot, model) self.density = z elif self.algo == 'spkde': # model : weights vector a d = self.X_data.shape[1] gamma = 1. / (2 * (self.bandwidth**2)) GG = rbf_kernel(self.X_data, X, gamma=gamma) * ( 2 * np.pi * self.bandwidth**2)**(-d / 2.) z = np.zeros((X.shape[0])) for j in range(X.shape[0]): for i in range(len(model)): z[j] += model[i] * GG[i, j] self.density = z else: print('no algo specified')
def gradForm2(Data, W, w, gamma, K1=None): # gradient of L= 1/2N (k(x1,x2)-cos(Wx1)cos(Wx2)) import numpy as np from sklearn.metrics import pairwise Nexp = np.shape(W)[1] Ndata, Nfeat = np.shape(Data) if K1 is None: K1 = pairwise.rbf_kernel(Data, gamma=gamma) C = np.zeros((Nfeat, Ndata)) B = np.zeros((Nfeat, Ndata)) for i in range(Ndata): X = np.tile(Data[i, ], (Ndata, 1)) K = K1[i, :] K = K[:, np.newaxis] # c1 = np.outer( np.cos(np.dot(Data, w)) * K, np.sin(np.dot(Data[i, ], w))) c2 = np.outer( np.sin(np.dot(Data, w)) * K, np.cos(np.dot(Data[i, ], w))) C[:, i] = np.squeeze((np.dot((X.T), c1) + np.dot((Data.T), c2))) # AK = np.dot(np.cos(np.dot(Data[i, ], W)), np.cos(np.dot(Data, W)).T) AK = AK[:, np.newaxis] # b1 = np.outer( np.cos(np.dot(Data, w)) * AK, np.sin(np.dot(Data[i, ], w))) b2 = np.outer( np.sin(np.dot(Data, w)) * AK, np.cos(np.dot(Data[i, ], w))) B[:, i] = np.squeeze((np.dot((X.T), b1) + np.dot((Data.T), b2))) # L = (1.0 / Ndata**2) * (np.sum(C, axis=1) - (2.0 / Nexp) * np.sum(B, axis=1)) L = L[:, np.newaxis] return L
def Lossfunction(Data, W, gamma, K=None, w=None): import numpy as np from sklearn.metrics import pairwise if w is None: W = W else: W = np.concatenate((W, w), axis=1) Nexp = np.shape(W)[1] Ndata, Nfeat = np.shape(Data) # if kernel is not provided if K is None: K = pairwise.rbf_kernel(Data, gamma=gamma) Phi = np.cos(np.dot(Data, W)) AK = (2.0 / Nexp) * np.dot(Phi, Phi.T) L = np.sum((K - AK)**2) / Ndata**2 # L= np.linalg.norm(K-AK)/np.linalg.norm(K) return L
def kernel_matrix(X, sigma, kernel, pkDegree, c0): print("Calculating Kernel matrix") # Value of sigma is very important, and objective of research.Here default value. # Get dimensions of square distance Matrix N N = X.shape[0] # Initialise with zeros Kernel matrix K = np.zeros((N, N)) if kernel == 'gaussian': gamma = 0.5 / sigma**2 K = rbf_kernel(X, gamma=gamma) elif kernel == 'laplacian': gamma = 1 / sigma K = laplacian_kernel(X, gamma=gamma) elif kernel == 'linear': K = linear_kernel(X) elif kernel == 'polynomial': K = polynomial_kernel(X, gamma=sigma, degree=pkDegree, coef0=c0) return K
def GP(seq_length=30, num_samples=28*5*100, num_signals=1, scale=0.1, kernel='rbf', **kwargs): # the shape of the samples is num_samples x seq_length x num_signals samples = np.empty(shape=(num_samples, seq_length, num_signals)) #T = np.arange(seq_length)/seq_length # note, between 0 and 1 T = np.arange(seq_length) # note, not between 0 and 1 if kernel == 'periodic': cov = periodic_kernel(T) elif kernel =='rbf': cov = rbf_kernel(T.reshape(-1, 1), gamma=scale) else: raise NotImplementedError # scale the covariance cov *= 0.2 # define the distribution mu = np.zeros(seq_length) print(np.linalg.det(cov)) distribution = multivariate_normal(mean=np.zeros(cov.shape[0]), cov=cov) pdf = distribution.logpdf # now generate samples for i in range(num_signals): samples[:, :, i] = distribution.rvs(size=num_samples) return samples, pdf
def updating_weight(j, weights, X, u, v, m, variances, nominator): """ j: weight vector dimension to update weights: weights vector X: data sample u: fuzzy membership degree v: centroid m: constant """ denominator = 0.0 quantile = np.divide(1, variances[j]) for i in range(v.shape[0]): k_sum = 0 for k in range(X.shape[0]): kernel = rbf_kernel(X[k, j].reshape(-1, 1), v[i, j].reshape(-1, 1), gamma=quantile) k_sum += np.multiply(np.power(u[i, k], m), np.multiply(2, 1 - kernel)) denominator += k_sum return nominator / denominator
def updating_centroid(i, j, u, X, m, v, variances): """ i: cluster position k: sample position u: fuzzy membership degree X: data sample m: constant v: centroid """ nominators = [] denominators = [] quantile = np.divide(1, variances[j]) for k in range(X.shape[0]): kernel = rbf_kernel(X[k, j].reshape(-1, 1), v[i, j].reshape(-1, 1), gamma=quantile) nominators.append( np.multiply(np.multiply(np.power(u[i, k], m), kernel), X[k, j])) denominators.append(np.multiply(np.power(u[i, k], m), kernel)) nominators = np.array(nominators) denominators = np.array(denominators) return nominators.sum() / denominators.sum()
def UPDATE_PROBABILITY_PARTIAL(R, X_train, s): R = np.array(R) temp = R C_pie = rbf_kernel(X_train, X_train[R, :]) k_pie = int(s / 2) ui_m, s, vt = linalg.svd(C_pie, full_matrices=False) new_K = (ui_m[:, 0:k_pie]).dot(np.diag(1 / np.sqrt(s[0:k_pie]))).dot( vt[0:k_pie, :]) C_nys = new_K E = C_pie - C_nys temp_p = np.zeros((X_train.shape[0], 1)).reshape(-1, 1) for j in range(X_train.shape[0]): if j in temp: temp_p[j] = 0 else: temp_p[j] = np.linalg.norm(E[j], ord=2) P = np.square(temp_p / np.sqrt(np.sum(np.square(temp_p)))) return P