def svm_laplacian(training, labels, test, real): laplacian = laplacian_kernel(training) laplacian_test = laplacian_kernel(test, training) model = SVC(C = 4, kernel = 'precomputed', max_iter = -1) model.fit(laplacian, labels) accuracy = model.score(laplacian_test, real) print(accuracy)
def kernel_matrix(self, data_matrix, y, kernel_index, gamma, train=True): kernel_list = ['rbf', 'polynomial', 'laplacian', 'linear'] if train : if kernel_index =='rbf': K = rbf_kernel(data_matrix, gamma = gamma) if kernel_index =='polynomial': K = polynomial_kernel(data_matrix, gamma = gamma) if kernel_index =='laplacian': K = laplacian_kernel(data_matrix, gamma = gamma) if kernel_index =='linear': #K = pairwise_kernels(data_matrix, 'linear') K = linear_kernel(data_matrix) return K else : if kernel_index =='rbf': K = rbf_kernel(data_matrix, y, gamma = gamma) if kernel_index =='polynomial': K = polynomial_kernel(data_matrix, y, gamma = gamma) if kernel_index =='laplacian': K = laplacian_kernel(data_matrix, y, gamma = gamma) if kernel_index =='linear': #K = pairwise_kernels(data_matrix, y, 'linear') K = linear_kernel(data_matrix, y) return K
def kernelMatrix(self, X, y=None): if self.K_type == 'linear': """ if y != None: if self.mu == None: reg = Lasso(self.param) #TODO change with a model for classification and let the possibility to specify regression or classification self_mu = reg.fit(X, y).coef_ self.Xtr = self.Xtr[:, mp.where(self_mu != 0)] self.X = self.X[:, mp.where(self_mu != 0)] """ if self.normalize: self.K = normalize(linear_kernel(X, self.Xtr)) else: self.K = linear_kernel(X, self.Xtr) return self.K if self.K_type == 'polynomial': if self.normalize: self.K = normalize( polynomial_kernel(X, self.Xtr, degree=self.param)) else: self.K = polynomial_kernel(X, self.Xtr, degree=self.param) return self.K if self.K_type == 'gaussian': if self.normalize: self.K = normalize(rbf_kernel(X, self.Xtr, gamma=self.param)) else: self.K = rbf_kernel(X, self.Xtr, gamma=self.param) return self.K if self.K_type == 'laplacian': if self.normalize: self.K = normalize( laplacian_kernel(X, self.Xtr, gamma=self.param)) else: self.K = laplacian_kernel(X, self.Xtr, gamma=self.param) return self.K if self.K_type == 'sigmoid': if self.normalize: self.K = normalize( sigmoid_kernel(X, self.Xtr, gamma=self.param)) else: self.K = sigmoid_kernel(X, self.Xtr, gamma=self.param) return self.K
def evaluate_clf(X, Y, best_params_, n_splits, n_eval=10): accuracy = [] n = n_eval for i in range(n): # after grid search, the best parameter is {'kernel': 'rbf', 'C': 100, 'gamma': 0.1} if best_params_['kernel'] == 'linear': clf = svm.SVC(kernel='linear', C=best_params_['C']) elif best_params_['kernel'] == 'rbf': clf = svm.SVC(kernel='rbf', C=best_params_['C'], gamma=best_params_['gamma']) elif best_params_[ 'kernel'] == 'precomputed': # take care of laplacian case clf = svm.SVC(kernel='precomputed', C=best_params_['C']) else: raise Exception('Parameter Error') k_fold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=i) if clf.kernel == 'precomputed': laplacekernel = laplacian_kernel(X, X, gamma=best_params_['gamma']) cvs = cross_val_score(clf, laplacekernel, Y, n_jobs=-1, cv=k_fold) print('CV Laplacian kernel') else: cvs = cross_val_score(clf, X, Y, n_jobs=-1, cv=k_fold) print(cvs) acc = cvs.mean() accuracy.append(acc) accuracy = np.array(accuracy) print('mean is %s, std is %s ' % (accuracy.mean(), accuracy.std())) return (accuracy.mean(), accuracy.std())
def _get_kernel_matrix(self, X1, X2): # K is len(X1)-by-len(X2) matrix if self._kernel == 'rbf': K = pairwise.rbf_kernel(X1, X2, gamma=self._gamma) elif self._kernel == 'poly': K = pairwise.polynomial_kernel(X1, X2, degree=self._degree, gamma=self._gamma, coef0=self._coef0) elif self._kernel == 'linear': K = pairwise.linear_kernel(X1, X2) elif self._kernel == 'laplacian': K = pairwise.laplacian_kernel(X1, X2, gamma=self._gamma) elif self._kernel == 'chi2': K = pairwise.chi2_kernel(X1, X2, gamma=self._gamma) elif self._kernel == 'additive_chi2': K = pairwise.additive_chi2_kernel(X1, X2) elif self._kernel == 'sigmoid': K = pairwise.sigmoid_kernel(X1, X2, gamma=self._gamma, coef0=self._coef0) else: print('[Error] Unknown kernel') K = None return K
def predict(X_train, X_test, y_train, y_test, train_sweights, weight_samples, gamma, C, dec_b): # standardize the bin counts scaler = StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) # define the model comp_gram = lambda X, X_: laplacian_kernel(X, X_, gamma / X.shape[1]) classifier = SVC(class_weight='balanced', random_state=1, kernel=comp_gram, probability=True, C=C) if not weight_samples: classifier.fit(X_train, y_train) else: classifier.fit(X_train, y_train, train_sweights) y_pred_p = classifier.predict_proba(X_test) # convert to binary prediction using the specified desicion boundry y_pred = np.where(y_pred_p[:, 1] < dec_b, 0, 1) # for known label compute score, else return prediction if y_test is not None: pres, recall, f1, _ = precision_recall_fscore_support( y_test, y_pred) return pres[1], recall[1], f1[1] # this is entered when pred_X is passed else: return y_pred_p, y_pred
def _apply_kernel(self, x, y): """Apply the selected kernel function to the data.""" if self.kernel == 'linear': phi = linear_kernel(x, y) elif self.kernel == 'rbf': phi = rbf_kernel(x, y, self.coef1) elif self.kernel == 'poly': phi = polynomial_kernel(x, y, self.degree, self.coef1, self.coef0) elif self.kernel == 'sigmoid': coef0 = self.coef0 if self.coef0 is not None else 1 phi = sigmoid_kernel(x, y, self.gamma, coef0) elif self.kernel == 'chi2': gamma = self.gamma if self.gamma is not None else 1 phi = chi2_kernel(x, y, self.gamma) elif self.kernel == 'laplacian': phi = laplacian_kernel(x, y, self.gamma) elif callable(self.kernel): phi = self.kernel(x, y) if len(phi.shape) != 2: raise ValueError( "Custom kernel function did not return 2D matrix") if phi.shape[0] != x.shape[0]: raise ValueError( "Custom kernel function did not return matrix with rows" " equal to number of data points." "") else: raise ValueError("Kernel selection is invalid.") if self.bias_used: phi = np.append(phi, np.ones((phi.shape[0], 1)), axis=1) return phi
def chooseKernel(data, kerneltype='euclidean'): r"""Kernalize data (uses sklearn) Parameters ========== data : array of shape (n_individuals, n_dimensions) Data matrix. kerneltype : {'euclidean', 'cosine', 'laplacian', 'polynomial_kernel', 'jaccard'}, optional Kernel type. Returns ======= array of shape (n_individuals, n_individuals) """ if kerneltype == 'euclidean': K = np.divide(1, (1+pairwise_distances(data, metric="euclidean"))) elif kerneltype == 'cosine': K = (pairwise.cosine_kernel(data)) elif kerneltype == 'laplacian': K = (pairwise.laplacian_kernel(data)) elif kerneltype == 'linear': K = (pairwise.linear_kernel(data)) elif kerneltype == 'polynomial_kernel': K = (pairwise.polynomial_kernel(data)) elif kerneltype == 'jaccard': K = 1-distance.cdist(data, data, metric='jaccard') scaler = KernelCenterer().fit(K) return(scaler.transform(K))
def min_corr_toeplitz(c2, tau=None, gamma0=1.): """For details, see here. Parameters ---------- c2 : array, shape (n_, n_) tau : array, shape (n,), optional g0 : float, optional Returns ------- c2_star : array, shape (n_, n_) gamma_star : array, shape (n_,) """ n_ = c2.shape[0] if tau is None: tau = np.array(range(n_)) tau = tau.reshape(n_, 1) # Step 1: Compute the square Frobenius norm between two correlations def func(g): return np.linalg.norm(laplacian_kernel(tau, tau, g) - c2, ord='f') # Step 2: Calibrate the parameter gamma gamma_star = sp.optimize.minimize(func, gamma0, bounds=[(0, None)])['x'][0] # Step 3: Compute the Toeplitz correlation c2_star = laplacian_kernel(tau, tau, gamma_star) return c2_star, gamma_star
def kernel_mean_matching(self, X, Z, kern='lin', B=1.0, eps=None): nx = X.shape[0] nz = Z.shape[0] print("nx: ", nx, " nz: ", nz) if eps == None: eps = B / math.sqrt(nz) if kern == 'lin': K = np.dot(Z, Z.T) K = K.todense() kappa = np.sum(np.dot(Z, X.T) * float(nz) / float(nx), axis=1) elif kern == 'rbf': K = sk.rbf_kernel(Z, Z) kappa = np.sum(sk.rbf_kernel(Z, X), axis=1) * float(nz) / float(nx) elif kern == 'poly': K = sk.polynomial_kernel(Z, Z) kappa = np.sum(sk.polynomial_kernel(Z, X), axis=1) * float(nz) / float(nx) elif kern == 'laplacian': K = sk.laplacian_kernel(Z, Z) kappa = np.sum(sk.laplacian_kernel(Z, X), axis=1) * float(nz) / float(nx) elif kern == 'sigmoid': K = sk.sigmoid_kernel(Z, Z) kappa = np.sum(sk.sigmoid_kernel(Z, X), axis=1) * float(nz) / float(nx) else: raise ValueError('unknown kernel') K = K.astype(np.double) K = matrix(K) kappa = matrix(kappa) G = matrix(np.r_[np.ones((1, nz)), -np.ones((1, nz)), np.eye(nz), -np.eye(nz)]) h = matrix(np.r_[nz * (1 + eps), nz * (eps - 1), B * np.ones((nz, )), np.zeros((nz, ))]) print("starting solver") solvers.options['show_progress'] = False sol = solvers.qp(K, -kappa, G, h) print(sol) coef = np.array(sol['x']) return coef
def wwl(X, node_features=None, num_iterations=3, sinkhorn=False, gamma=None): """ Pairwise computation of the Wasserstein Weisfeiler-Lehman kernel for graphs in X. """ D_W = pairwise_wasserstein_distance(X, node_features = node_features, num_iterations=num_iterations, sinkhorn=sinkhorn) wwl = laplacian_kernel(D_W, gamma=gamma) return wwl
def laplFunc(): if self.parameters["kernel"].__contains__("width"): s = self.parameters["kernel"]["width"] else: s = 2 K = smp.laplacian_kernel(X, Y, gamma=s) return K
def test_laplacian_kernel(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) K = laplacian_kernel(X, X) # the diagonal elements of a laplacian kernel are 1 assert_array_almost_equal(np.diag(K), np.ones(5)) # off-diagonal elements are < 1 but > 0: assert np.all(K > 0) assert np.all(K - np.diag(np.diag(K)) < 1)
def gram_laplacian_scipy(x): """Compute Gram (kernel) matrix for a laplacian kernel. Args: x: A num_examples x num_features matrix of features. Returns: A num_examples x num_examples Gram matrix of examples. """ K = laplacian_kernel(x) return K
def calc_gaussian_sim(data_matrix, method): if method == "rbf": return rbf_kernel(data_matrix) elif method == "chi2": return chi2_kernel(data_matrix) elif method == "laplacian": return laplacian_kernel(data_matrix) elif method == "sigmoid": return sigmoid_kernel(data_matrix) else: raise ValueError("Wron method parameter ind calc_gaussian_sim()")
def transform(self, X, Y): if self.type == 'rbf': return rbf_kernel(X, Y, self.gamma)[0] elif self.type == 'Chi2': return chi2_kernel(X, Y, self.gamma)[0] elif self.type == 'AChi2': return -additive_chi2_kernel(X, Y)[0] elif self.type == 'laplacian': return laplacian_kernel(X, Y, self.gamma)[0] elif self.type == 'sigmoid': return sigmoid_kernel(X, Y, self.gamma, self.coef0)[0]
def __init__(self, kernel_name='rbf', type='classification'): self.kernel_name = kernel_name self.type = type self.kernel_dict = { "rbf": lambda x, y=None: rbf_kernel(x, y), "linear": lambda x, y=None: linear_kernel(x, y), "add_chi2": lambda x, y=None: additive_chi2_kernel(x, y), "chi2": lambda x, y=None: chi2_kernel(x, y), "poly": lambda x, y=None: polynomial_kernel(x, y), "laplace": lambda x, y=None: laplacian_kernel(x, y) }
def fit(self, signal, n_batch=20): ''' Computes the gram matrix on the given signal Parameters ---------- signal (np.array): pre-processed signal for cp-detection n_batch (int): number of batches for batch-bandwidth calculation Returns ------- self including self.gram Note: Can be computationally expensive for large signals, since the gram matrix is computed. ''' signal -= np.mean(signal) if signal.ndim == 1: signal = signal.reshape(-1, 1) if (self.bandwidth == 'median'): if (self.kernel == 'laplace'): sigma = np.median(pdist(signal, metric='cityblock')) elif (self.kernel == 'gaussian'): sigma = np.median(pdist(signal, metric="sqeuclidean")) elif (self.bandwidth == 'sig_std'): sigma = np.std(signal) elif (self.bandwidth == 'sig_std_batch_max'): n = int(signal.shape[0] / n_batch) batch_signal = [ signal[i:i + n] for i in range(0, signal.shape[0], n) ] std_ = [np.std(i) for i in batch_signal] sigma = np.max(std_) if (self.kernel == 'linear'): gram = linear_kernel(signal) elif (self.kernel == 'laplace'): gram = laplacian_kernel(signal, gamma=(1 / sigma)) elif (self.kernel == 'gaussian'): gram = rbf_kernel(signal, gamma=(1 / sigma)) self.gram = gram return self
def calculateMultipleKernel(x, y): theta = random.sample(range(1,47),46) # given a random theta for now # Convert our 2d arrays to numpy arrays x = np.array(x) y = np.array(y) # Reshape the array-like input vectors since we only have one sample x = x.reshape(1,-1) y = y.reshape(1,-1) # Variables to aggregate the kernel result kernelResult = 0; index = 0; for i in range(0,3): kernelResult += theta[index] * additive_chi2_kernel(x,y) index += 1 for i in range(0,3): kernelResult += theta[index] * chi2_kernel(x,y,theta[index+1]) index += 2 for i in range(0,3): kernelResult += theta[index] * cosine_similarity(x,y) index += 1 for i in range(0,3): kernelResult += theta[index] * linear_kernel(x,y) index += 1 for i in range(0,3): kernelResult += theta[index] * polynomial_kernel( x,y,theta[index+1],theta[index+2], theta[index+3]) index += 4 for i in range(0,3): kernelResult += theta[index] * rbf_kernel(x,y,theta[index+1]) index += 2 for i in range(0,3): kernelResult += theta[index] * laplacian_kernel(x,y,theta[index+1]) index += 2 for i in range(0,3): kernelResult += theta[index] * sigmoid_kernel(x,y,theta[index+1]) index += 2 return kernelResult
def calculateMultipleKernel(x, y): theta = random.sample(range(1, 47), 46) # given a random theta for now # Convert our 2d arrays to numpy arrays x = np.array(x) y = np.array(y) # Reshape the array-like input vectors since we only have one sample x = x.reshape(1, -1) y = y.reshape(1, -1) # Variables to aggregate the kernel result kernelResult = 0 index = 0 for i in range(0, 3): kernelResult += theta[index] * additive_chi2_kernel(x, y) index += 1 for i in range(0, 3): kernelResult += theta[index] * chi2_kernel(x, y, theta[index + 1]) index += 2 for i in range(0, 3): kernelResult += theta[index] * cosine_similarity(x, y) index += 1 for i in range(0, 3): kernelResult += theta[index] * linear_kernel(x, y) index += 1 for i in range(0, 3): kernelResult += theta[index] * polynomial_kernel( x, y, theta[index + 1], theta[index + 2], theta[index + 3]) index += 4 for i in range(0, 3): kernelResult += theta[index] * rbf_kernel(x, y, theta[index + 1]) index += 2 for i in range(0, 3): kernelResult += theta[index] * laplacian_kernel(x, y, theta[index + 1]) index += 2 for i in range(0, 3): kernelResult += theta[index] * sigmoid_kernel(x, y, theta[index + 1]) index += 2 return kernelResult
def chooseKernel(data, kerneltype='euclidean'): if kerneltype == 'euclidean': K = np.divide(1, (1 + pairwise_distances(data, metric="euclidean"))) elif kerneltype == 'cosine': K = (pairwise.cosine_kernel(data)) elif kerneltype == 'laplacian': K = (pairwise.laplacian_kernel(data)) elif kerneltype == 'linear': K = (pairwise.linear_kernel(data)) elif kerneltype == 'polynomial_kernel': K = (pairwise.polynomial_kernel(data)) elif kerneltype == 'jaccard': K = 1 - distance.cdist(data, data, metric='jaccard') scaler = KernelCenterer().fit(K) return (scaler.transform(K))
def kernel_matrix(X, sigma, kernel): print("Calculating Kernel matrix") # Initialise with zeros Kernel-Weight matrix N = X.shape[0] K = np.zeros((N, N)) if kernel == 'gaussian': gamma = 1 / (2 * (sigma**2)) K = rbf_kernel(X, gamma=gamma) elif kernel == 'laplacian': gamma = 1 / sigma K = laplacian_kernel(X, gamma=gamma) return K
def kernel_function(self, x1, x2): features = [] # linear kernel: # Cosine distance features += np.squeeze(1 - pairwise.paired_cosine_distances(x1, x2)[0]), # Manhanttan distance features += pairwise.paired_manhattan_distances(x1, x2)[0], # Euclidean distance features += pairwise.paired_euclidean_distances(x1, x2)[0], # Chebyshev distance features += pairwise.pairwise_distances(x1, x2, metric="chebyshev")[0][0], # stat kernel: # Pearson coefficient pearson = stats.pearsonr(np.squeeze(np.asarray(x1)), np.squeeze(np.asarray(x2)))[0] features += 0 if np.isnan(pearson) else pearson, # Spearman coefficient spearman = stats.spearmanr(x1, x2, axis=1).correlation features += 0 if np.isnan(spearman) else spearman, # Kendall tau coefficient kendall = stats.kendalltau(x1, x2).correlation features += 0 if np.isnan(kendall) else kendall, # non-linear kernel: # polynomial features += pairwise.polynomial_kernel(x1, x2, degree=2)[0][0], # rbf features += pairwise.rbf_kernel(x1, x2)[0][0], # laplacian features += pairwise.laplacian_kernel(x1, x2)[0][0], # sigmoid features += pairwise.sigmoid_kernel(x1, x2)[0][0], return features
def get_singular_vals_kernels(weight_dict, kernel='cosine', activation=False): explained_var_dict = {} for layer_name in weight_dict.keys(): if 'weight' in layer_name or activation: w = weight_dict[ layer_name] # w is output x input so don't transpose if len(w.shape) > 2: # conv layer w = w.reshape(w.shape[0] * w.shape[1], -1) if kernel == 'cosine': K = pairwise.cosine_similarity(w) elif kernel == 'rbf': K = pairwise.rbf_kernel(w) elif kernel == 'laplacian': K = pairwise.laplacian_kernel(w) pca = PCA() pca.fit(K) explained_var_dict[layer_name] = deepcopy(pca.singular_values_) return explained_var_dict
def dataPreProcess(): filename = '/Users/guichengwu/Desktop/208_mid term/exam.dat' data = np.loadtxt(filename, dtype='str') for i in range(data.shape[0]): for j in range(1,data.shape[1]): data[i][j] = data[i][j][2:] data_matrix = np.matrix(data).astype(np.float) X = data_matrix[:, 1:5] Y = data_matrix[:, 0] X = preprocessing.scale(X) X = laplacian_kernel(X) #pca = decomposition.PCA(n_components=3) #pca.fit(X) #X = pca.transform(X) X_train, X_test, Y_train, Y_test = cross_validation.train_test_split( X, Y, test_size =0.2)
def _kernel(self, X, Y=None): kernel = None if self.kernel == 'chi2': kernel = chi2_kernel(X, Y, gamma=self.gamma) elif self.kernel == 'laplacian': kernel = laplacian_kernel(X, Y, gamma=self.gamma) elif self.kernel == 'linear': kernel = linear_kernel(X, Y) elif self.kernel == 'polynomial': kernel = polynomial_kernel(X, Y, degree=self.degree, gamma=self.gamma, coef0=self.coef0) elif self.kernel == 'rbf': kernel = rbf_kernel(X, Y, gamma=self.gamma) elif self.kernel == 'sigmoid': kernel = sigmoid_kernel(X, Y, gamma=self.gamma, coef0=self.coef0) return kernel
def evaluate_sklearn_kernel(kernel_name, kernel_param, X, Y=None): # These names are consistent with sklearn's if kernel_name not in ['linear', 'polynomial', 'rbf', 'laplacian']: raise Exception('Unrecognised kernel name \'' + kernel_name + '\'!') if kernel_name == 'linear': return linear_kernel(X=X, Y=Y) elif kernel_name == 'polynomial': (degree_param, gamma_param, coef0_param) = get_polynomial_kernel_params(kernel_param=kernel_param) return polynomial_kernel(X=X, Y=Y, degree=degree_param, gamma=gamma_param, coef0=coef0_param) elif kernel_name == 'rbf': return rbf_kernel(X=X, Y=Y, gamma=kernel_param) else: # Laplacian return laplacian_kernel(X=X, Y=Y, gamma=kernel_param)
def get_kernel_matrix(X1, X2=None, kernel='rbf',gamma = 1, degree = 3, coef0=1): #Obtain N1xN2 kernel matrix from N1xM and N2xM data matrices if kernel == 'rbf': K = pairwise.rbf_kernel(X1,X2,gamma = gamma); elif kernel == 'poly': K = pairwise.polynomial_kernel(X1,X2,degree = degree, gamma = gamma, coef0 = coef0); elif kernel == 'linear': K = pairwise.linear_kernel(X1,X2); elif kernel == 'laplacian': K = pairwise.laplacian_kernel(X1,X2,gamma = gamma); elif kernel == 'chi2': K = pairwise.chi2_kernel(X1,X2,gamma = gamma); elif kernel == 'additive_chi2': K = pairwise.additive_chi2_kernel(X1,X2); elif kernel == 'sigmoid': K = pairwise.sigmoid_kernel(X1,X2,gamma = gamma,coef0 = coef0); else: print('[Error] Unknown kernel'); K = None; return K;
def kernel_matrix(X, sigma, kernel, pkDegree, c0): print("Calculating Kernel matrix") # Value of sigma is very important, and objective of research.Here default value. # Get dimensions of square distance Matrix N N = X.shape[0] # Initialise with zeros Kernel matrix K = np.zeros((N, N)) if kernel == 'gaussian': gamma = 0.5 / sigma**2 K = rbf_kernel(X, gamma=gamma) elif kernel == 'laplacian': gamma = 1 / sigma K = laplacian_kernel(X, gamma=gamma) elif kernel == 'linear': K = linear_kernel(X) elif kernel == 'polynomial': K = polynomial_kernel(X, gamma=sigma, degree=pkDegree, coef0=c0) return K
def prediction(method, input_X, param=None): filename = '/Users/guichengwu/Desktop/208_mid term/exam.dat' data = np.loadtxt(filename, dtype='str') for i in range(data.shape[0]): for j in range(1,data.shape[1]): data[i][j] = data[i][j][2:] data_matrix = np.matrix(data).astype(np.float) X = data_matrix[:, 1:5] Y = data_matrix[:, 0] nrow = X.shape[0] X = np.vstack((X, input_X)) X = preprocessing.scale(X) X = laplacian_kernel(X) input_X = X[nrow:X.shape[0], :] X_train = X[0:nrow, :] Y_train = Y return method_selection(method, X_train, Y_train, input_X, param)
def __init__(self, B=1000, epsilon=0.1, kernel="gaussian", gamma=None, coef0=1.0, degree=3): self._B = B self._epsilon = epsilon if kernel == "gaussian": self._kernel = lambda x, y: pw.rbf_kernel(x, y, gamma) elif kernel == "linear": self._kernel = pw.linear_kernel elif kernel == "polynomial": self._kernel = lambda x, y: pw.polynomial_kernel( x, y, degree, gamma, coef0) elif kernel == "sigmoid": self._kernel = lambda x, y: pw.sigmoid_kernel(x, y, gamma, coef0) elif kernel == "laplacian": self._kernel = lambda x, y: pw.laplacian_kernel(x, y, gamma) else: self._kernel = kernel
def preprocessGraph(): filename = '/Users/guichengwu/Desktop/208_mid term/exam.dat' data = np.loadtxt(filename, dtype='str') for i in range(data.shape[0]): for j in range(1,data.shape[1]): data[i][j] = data[i][j][2:] data_matrix = np.matrix(data).astype(np.float) X = data_matrix[:, 1:5] Y = np.asarray(data_matrix[:, 0]) X = preprocessing.scale(X) X = laplacian_kernel(X) #X = polynomial_kernel(X) #X = laplacian_kernel(X) #X = rbf_kernel(X) #X = sigmoid_kernel(X) pca = decomposition.PCA(n_components=3) pca.fit(X) X = pca.transform(X) data_fig1 = plt.figure(1, figsize=(8, 6)) plt.clf() #Plot the training points plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) plt.xlabel('Projection Vector 1') plt.ylabel('Projection Vector 2') plt.show() data_fig1.savefig('/Users/guichengwu/Desktop/208_mid term/data_2d.png') data_fig2 = plt.figure(2) ax2 = data_fig2.add_subplot(111, projection='3d') ax2.scatter(np.asarray(X[:,0]), np.asarray(X[:,1]), np.asarray(X[:, 2]), c=Y, cmap=plt.cm.Paired) plt.show() data_fig2.savefig('/Users/guichengwu/Desktop/208_mid term/data_3d.png')
# kinects=['K1', 'K2', 'K3', 'K4', 'K5'], # kernel_func_rgb=lambda X, L=None: pairwise.laplacian_kernel(X, L, gamma=0.00001), # kernel_func_depth=lambda X, L=None: pairwise.laplacian_kernel(X, L, gamma=0.00001), # kernel_func_concatenate=lambda X, L=None: pairwise.laplacian_kernel(X, L, gamma=0.001), # C_mkl=100, # C_concatenate=100, # lam_mkl=0.5, # late_fusion_weight_rgb=0.8, # late_fusion_weight_depth=0.2 # ) laplacian_params_K1 = Params( name='laplacian', assignable_names=['lap', 'laplacian'], kinects=['K1'], kernel_func_mkl_rgb=lambda X, L=None: pairwise.laplacian_kernel( X, L, gamma=0.0001), kernel_func_mkl_of=lambda X, L=None: pairwise.laplacian_kernel( X, L, gamma=0.0001), kernel_func_mkl_depth=lambda X, L=None: pairwise.laplacian_kernel( X, L, gamma=0.0001), kernel_func_concatenate=lambda X, L=None: pairwise.laplacian_kernel( X, L, gamma=0.001), kernel_func_svm_rgb=lambda X, L=None: pairwise.laplacian_kernel( X, L, gamma=0.00001), kernel_func_svm_depth=lambda X, L=None: pairwise.laplacian_kernel( X, L, gamma=0.00001), C_mkl=100, C_concatenate=100, C_rgb=100, C_of=100, C_depth=100,
def drawAlgoCompGraph(): h = 0.02 names = ["ridge", "KNN", "Linear SVM", "RBF SVM", "LDA", "Random Forest", "AdaBoost", "Naive Bayes", "QDA", "Logistic"] kernel_names =['laplacian kernel', 'RBF kernel', 'Sigmoid kernel'] classifiers = [ linear_model.Ridge(), KNeighborsClassifier(9), SVC(kernel="linear", C=0.025), SVC(kernel="rbf", gamma=0.25), LDA(), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), QDA(), linear_model.LogisticRegression()] filename = '/Users/guichengwu/Desktop/208_mid term/exam.dat' data = np.loadtxt(filename, dtype='str') for i in range(data.shape[0]): for j in range(1,data.shape[1]): data[i][j] = data[i][j][2:] data_matrix = np.matrix(data).astype(np.float) X = data_matrix[:, 1:5] y = np.asarray(data_matrix[:, 0]) X = preprocessing.scale(X) Lap_X = laplacian_kernel(X) pca1 = decomposition.PCA(n_components=2) pca1.fit(Lap_X) Lap_X = pca1.transform(Lap_X) RBF_X = rbf_kernel(X) pca2 = decomposition.PCA(n_components=2) pca2.fit(RBF_X) RBF_X = pca2.transform(RBF_X) Sig_X = sigmoid_kernel(X) pca3 = decomposition.PCA(n_components=2) pca3.fit(Sig_X) Sig_X = pca3.transform(Sig_X) linearly_separable1 = (Lap_X, y) linearly_separable2 = (RBF_X, y) linearly_separable3 = (Sig_X, y) datasets = [ linearly_separable1, linearly_separable2, linearly_separable3, ] figure = plt.figure(figsize=(30, 10)) i = 1 for kernel_name, ds in zip(kernel_names, datasets): X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max()+0.5 y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max()+0.5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) cm = plt.cm.RdBu cm_bright = ListedColormap(['#FF0000', '#0000FF']) ax = plt.subplot(len(datasets), len(classifiers)+1, i) ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) ax.scatter(X_test[:, 0], X_test[:,1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) ax.set_title(kernel_name) i += 1 # iterate over classifiers for name, clf in zip(names, classifiers): ax = plt.subplot(len(datasets), len(classifiers) + 1, i) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) else: Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=0.8) ax.scatter(X_train[:, 0], X_train[:,1], c=y_train, cmap=cm_bright) ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) ax.set_title(name) ax.text(xx.max() - 0.3, yy.min() + 0.3, ('%.2f' % score).lstrip('0'), size=15, horizontalalignment='right') i += 1 figure.subplots_adjust(left=0.02, right=0.98) plt.show() figure.savefig('/Users/guichengwu/Desktop/algorithm_comparison2.png')
def kernel_mean_matching(self, X, Z, y_labels, kern='lin', B0=1.0, B1=1.0, eps=None): nx = X.shape[0] nz = Z.shape[0] print("nx: ", nx, " nz: ", nz) print("B0: ", B0, " B1: ", B1) if eps == None: avg = (B0 + B1) * 1.0 / 2.0 eps = avg / math.sqrt(nz) if kern == 'lin': K = np.dot(Z, Z.T) K = K.todense() kappa = np.sum(np.dot(Z, X.T) * float(nz) / float(nx), axis=1) elif kern == 'rbf': K = sk.rbf_kernel(Z, Z) kappa = np.sum(sk.rbf_kernel(Z, X), axis=1) * float(nz) / float(nx) elif kern == 'poly': K = sk.polynomial_kernel(Z, Z) kappa = np.sum(sk.polynomial_kernel(Z, X), axis=1) * float(nz) / float(nx) elif kern == 'laplacian': K = sk.laplacian_kernel(Z, Z) kappa = np.sum(sk.laplacian_kernel(Z, X), axis=1) * float(nz) / float(nx) elif kern == 'sigmoid': K = sk.sigmoid_kernel(Z, Z) kappa = np.sum(sk.sigmoid_kernel(Z, X), axis=1) * float(nz) / float(nx) else: raise ValueError('unknown kernel') K = K.astype(np.double) K = matrix(K) kappa = matrix(kappa) G = matrix(np.r_[np.ones((1, nz)), -np.ones((1, nz)), np.eye(nz), -np.eye(nz)]) true_label_max = np.argmax(y_labels, axis=1) updatedm = np.ones((nz, )) updatedm[true_label_max == 1] = B0 updatedm[true_label_max == 0] = B1 h = matrix(np.r_[nz * (1 + eps), nz * (eps - 1), updatedm, np.zeros((nz, ))]) solvers.options['show_progress'] = False print("starting solver") sol = solvers.qp(K, -kappa, G, h) coef = np.array(sol['x']) print(sol) return coef