def centering(): #test workdir = os.path.join("tkernel_new", "tkernel_new", "rodrech") config = read_config() freader = file_reader(config["dt"], workdir) testfn = "data.bin" trainfn = "train.bin" tkernfn = "tkern.bin" Kxfn = "Kx.bin" train = freader(trainfn, config["dim"]) nTrain = train.shape[0] test = freader(testfn, config["dim"]) nTest = test.shape[0] sigma = config["sigma"] tKernEstimed = np.array([[np.exp(-0.5 * np.dot(trEl-testEl, trEl-testEl)/sigma**2) for trEl in train] for testEl in test]) trTotalExt = 7479 # 3834 Kx = freader(Kxfn, trTotalExt) tKernEstimedCent = mlpy.kernel_center(tKernEstimed, Kx.T) gpuTKernCent = freader("t_kern_centr.bin", nTrain) assert np.allclose(tKernEstimedCent, gpuTKernCent, atol=1e-5)
def estim_kbasis(self, trData): self.trData = trData Kx = np.mat([[self.kernel_func(np.array(xi), np.array(xj)) for xj in self.trData] for xi in self.trData]) Kx = np.mat(mlpy.kernel_center(Kx, Kx)) vals, vecs = np.linalg.eig(Kx) norm_mat = np.mat(np.diag([1.0/np.sqrt(vals[i]) for i in range(len(vals))])) self.vecs = vecs * norm_mat print 'mult:', vals[0] * self.vecs[:, 0].T * self.vecs[:, 0]
def draw_data(data, clabs, kernel_func, testData): fig = plt.figure(1) # plot print data.shape ax1 = plt.subplot(121) plot1 = plt.scatter(data[:, 0], data[:, 1], c=clabs) plot1_5 = plt.scatter(testData[:, 0], testData[:, 1]) Kx = np.mat([[kernel_func(xi, xj) for xj in data] for xi in data]) KxCopy = Kx Kx = np.mat(mlpy.kernel_center(Kx, Kx)) print "KxCopy == Kx", np.allclose(KxCopy, Kx) vals, vecs = np.linalg.eig(Kx) print "mult before normalization", vals[0]*vecs[:, 0].T*vecs[:, 0] norm_mat = np.mat(np.diag([1.0/np.sqrt(vals[i]) for i in range(len(vals))])) vecs = vecs * norm_mat print 'mult:', vals[0] * vecs[:, 0].T * vecs[:, 0] # gK = mlpy.kernel_gaussian(data, data, sigma=2) # gaussian_pca = mlpy.KPCA(mlpy.KernelGaussian(2.0)) # gaussian_pca.learn(data) data_k_trans = np.real(Kx.T * vecs[:, :2]) data_k_trans = np.array(data_k_trans) ax2 = plt.subplot(122) plot2 = plt.scatter(data_k_trans[:, 0], data_k_trans[:, 1], c=clabs) Kt = np.mat([[kernel_func(xi, xj) for xj in data] for xi in testData]) Kt = np.mat(mlpy.kernel_center(Kt, KxCopy)) kTransTestData = np.real(Kt * vecs[:, :2]) kTransTestData = np.array(kTransTestData) plot2_5 = plt.scatter(kTransTestData[:, 0], kTransTestData[:, 1]) plt.show()
def estim_kbasis(self, trData, labels): self.trData = trData Y = one_of_c(labels) Kx = np.mat([[self.kernel_func(np.array(xi), np.array(xj)) for xj in self.trData] for xi in self.trData]) Kx = np.mat(mlpy.kernel_center(Kx, Kx)) matForEig = np.mat(np.vstack([np.hstack([np.zeros(Kx.shape), Kx * Y]), np.hstack([Y.T * Kx, np.zeros((Y.shape[1], Y.shape[1]))])])) # plt.imshow(Kx, cmap = cm.Greys_r) # plt.show() vals, vecs = np.linalg.eig(matForEig) vals = vals[:Kx.shape[0]] #print vals[:20] vals = np.real(np.array([vals[i] for i in range(1, len(vals), 2)])) vecs = vecs[:Kx.shape[0], :Kx.shape[0]] self.vecs = vecs[:, 1] for i in range(3, vecs.shape[1], 2): self.vecs = np.hstack([self.vecs, vecs[:, i]]) self.vecs = np.mat(self.vecs) norm_mat = np.mat(np.diag([np.sqrt(2)/np.sqrt(vals[i]) for i in range(len(vals))])) #plobj1 = pylab.plot(vecs[:, 0]) self.vecs = self.vecs * norm_mat print 'mult:', vals[0] * self.vecs[:, 0].T * self.vecs[:, 0]
def transform(self, data, k=2): Kx = np.mat([[self.kernel_func(np.array(xi), np.array(xj)) for xj in self.trData] for xi in self.trData]) Kt = np.mat([[self.kernel_func(np.array(xi), np.array(xj)) for xj in self.trData] for xi in data]) Kt = np.mat(mlpy.kernel_center(Kt, Kx)) kTransTestData = np.real(Kt * self.vecs[:, :k]) return np.array(kTransTestData)
def estim_kbasis(self, trData, labels, regParam, verbose=True): self.trData = trData subSetInds = [] while len(set(subSetInds)) < regParam: subSetInds.append(int(random.uniform(0, len(trData)))) subSetInds = sorted(list(set(subSetInds))) if verbose: print "The sub set of indecies for reqularization has been selected" self.trDataSubset = np.array([trData[i] for i in subSetInds]) Y = one_of_c(labels) Kx = np.mat([[self.kernel_func(np.array(xi), np.array(xj)) for xj in self.trData] for xi in self.trDataSubset]) self.Kx = Kx if verbose: print "Requralized kernel matrix has been estimated!" print "The shape of KxReg is", self.Kx.shape Kx = np.mat(mlpy.kernel_center(Kx, Kx)) Ky = Y * Y.T Ky = np.mat(mlpy.kernel_center(Ky, Ky)) meval = min(np.real(np.linalg.eigvals(Kx * Kx.T))) KxKxT = Kx * Kx.T - 1e4 * meval * np.identity(Kx.shape[0]) if verbose: print "Valitidy Check 1: KxKxT is simmetric:", \ np.allclose(KxKxT, (KxKxT).T) def is_pos_def(x): return np.all(np.linalg.eigvals(x) > 0) if verbose: print "Validity Check 2: KxKxT is positive definite:", \ is_pos_def(KxKxT) # plt.imshow(Kx, cmap = cm.Greys_r) # plt.show() vals, vecs = eigh(Kx*Ky*Kx.T, KxKxT) vals = np.array([np.real(v) for v in vals]) print vals # plt.plot(vals) # plt.show() # vecs = vecs.T # lambdas, alphas = zip(*sorted(zip(vals, vecs), reverse=True)) # lambdas = np.array([l for l in lambdas]) # alphas = np.mat([col for col in alphas]) # args = vals.argsort(axis = 0) # alphas = np.mat(vecs[list(reversed(args))]) # alphas = alphas.T # lambdas = sorted(vals, reverse=True) lambdas = vals alphas = np.mat(vecs) print type(lambdas), type(alphas) # plt.plot(lambdas) # normTerm = (alpha.T * Kx) * (Kx * alpha) # alpha = np.mat(vecs[:, 3]).T # print 'mult before norm:', 1.0 / ((alpha.T * Kx) * (Kx * alpha)) norm_mat = np.mat(np.diag([1.0/np.sqrt(((alphas[:, i].T * Kx) * (Kx.T * alphas[:, i]))[0,0]) for i in range(len(lambdas))])) alphas = alphas * norm_mat alpha = np.mat(alphas[:, 44]) # alpha = alpha / np.sqrt(normTerm) print 'mult after norm:', (alpha.T * Kx) * (Kx.T * alpha) self.vecs = alphas
def transform(self, data): tKernEstimed = np.array([[np.exp(-0.5 * np.dot(trEl-el, trEl-el)/self.sigma**2) for trEl in self.train] for el in data]) tKernEstimed = mlpy.kernel_center(tKernEstimed, self.Kx.T) return np.dot(tKernEstimed, self.eigvecs)