def KRR(): np.random.seed(10) targetValues = np.genfromtxt("F:\PY\data\stock.txt", skip_header=1, dtype=None, delimiter="\t", usecols=(1)) mse = {} for i in range(20, 50): trainingPoints = np.arange(i - 1).reshape(-1, 1) testPoints = np.arange(i).reshape(-1, 1) # training kernel matrix knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) # testing kernel matrix knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = KernelRidge(lmb=0.01, kernel=None) err = 0 for j in range(1, 6): knlRidge.learn(knl, targetValues[-(i + j - 1) : -j]) resultPoints = knlRidge.pred(knlTest) err += (resultPoints[-1] - targetValues[-j]) ** 2 print i, resultPoints[-1], targetValues[-j] mse[i] = square(err / 5) print mse[i] sortedMse = sorted(mse.iteritems(), key=operator.itemgetter(1), reverse=False) print sortedMse[0]
def KRR(): np.random.seed(10) targetValues = np.genfromtxt('F:\PY\data\stock.txt', skip_header=1, dtype=None, delimiter='\t', usecols=(1)) mse = {} for i in range(20, 50): trainingPoints = np.arange(i - 1).reshape(-1, 1) testPoints = np.arange(i).reshape(-1, 1) #training kernel matrix knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) #testing kernel matrix knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = KernelRidge(lmb=0.01, kernel=None) err = 0 for j in range(1, 6): knlRidge.learn(knl, targetValues[-(i + j - 1):-j]) resultPoints = knlRidge.pred(knlTest) err += (resultPoints[-1] - targetValues[-j])**2 print i, resultPoints[-1], targetValues[-j] mse[i] = square(err / 5) print mse[i] sortedMse = sorted(mse.iteritems(), key=operator.itemgetter(1), reverse=False) print sortedMse[0]
def KRRTest(): np.random.seed(10) targetValues = np.genfromtxt('F:\PY\data\stock.txt', skip_header=1, dtype=None, delimiter='\t', usecols=(1)) trainingPoints = np.arange(46).reshape(-1, 1) testPoints = np.arange(47).reshape(-1, 1) #training kernel matrix knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) #testing kernel matrix knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = KernelRidge(lmb=0.01, kernel=None) knlRidge.learn(knl, targetValues[-46:]) resultPoints = knlRidge.pred(knlTest) print targetValues.size, resultPoints fig = plt.figure(1) plot1 = plt.plot(trainingPoints, targetValues[-46:], 'o') plot2 = plt.plot(testPoints, resultPoints) plt.show()
def SmoothKRR(): y = np.genfromtxt('F:\PY\data\Gold.csv', skip_header=1, dtype=None, delimiter=',', usecols=(1)) targetValues = smooth(y, len(y)) np.random.seed(10) trainingPoints = np.arange(125).reshape(-1, 1) testPoints = np.arange(126).reshape(-1, 1) knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = mlpy.KernelRidge(lmb=0.01, kernel=None) knlRidge.learn(knl, targetValues) resultPoints = knlRidge.pred(knlTest) print(resultPoints) plt.step(trainingPoints, targetValues, 'o') plt.step(testPoints, resultPoints) plt.show()
def KRR(): np.random.seed(10) targetValues = np.genfromtxt('F:\PY\data\Gold.csv', skip_header=1, dtype=None, delimiter=',', usecols=(1)) trainingPoints = np.arange(125).reshape(-1, 1) testPoints = np.arange(126).reshape(-1, 1) #training kernel matrix knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) #testing kernel matrix knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = KernelRidge(lmb=0.01, kernel=None) knlRidge.learn(knl, targetValues) resultPoints = knlRidge.pred(knlTest) print(resultPoints) fig = plt.figure(1) plot1 = plt.plot(trainingPoints, targetValues, 'o') plot2 = plt.plot(testPoints, resultPoints) plt.show()
def SmoothKRR(): y = np.genfromtxt('F:\PY\data\stock.txt', skip_header=1, dtype=None, delimiter='\t', usecols=(1)) targetValues = smooth(y, len(y)) np.random.seed(10) trainingPoints = np.arange(28).reshape(-1, 1) testPoints = np.arange(29).reshape(-1, 1) knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = mlpy.KernelRidge(lmb=0.01, kernel=None) knlRidge.learn(knl, targetValues) resultPoints = knlRidge.pred(knlTest) print resultPoints plt.step(trainingPoints, targetValues, 'o') plt.step(testPoints, resultPoints) plt.show()
def klda(X, y): class_indices = [] class_indices.append(np.where(y == -1)) #at pos 0 class_indices.append(np.where(y == 1)) #at pos 1 #dimension n = X.shape[0] d = X.shape[1] n1 = X[class_indices[0]].shape[0] n2 = X[class_indices[1]].shape[0] K = X.dot(X.T) #Calculate Kernel matrix K1 n1xn1 K1 = mlpy.kernel_gaussian(X, X[class_indices[0]], sigma=15) K2 = mlpy.kernel_gaussian(X, X[class_indices[1]], sigma=15) #Calculate means #Calculate mean matrix M1 M1 = np.zeros((n, 1)) class_no = 0 for i, xi in enumerate(X): M1[i] = np.sum([[xi.dot(xj.T)] for xj in X[class_indices[class_no]]]) #Calculate mean matrix M2 M2 = np.zeros((n, 1)) class_no = 1 for i, xi in enumerate(X): M2[i] = np.sum([[xi.dot(xj.T)] for xj in X[class_indices[class_no]]]) #Calculate between class scatter matrix M = (M2 - M1).dot((M2 - M1).T) #Calculate within class scatter matrix N1 = (K1.dot(np.identity(n1) - (np.ones(n1) * n1))).dot(K1.T) N2 = (K2.dot(np.identity(n2) - (np.ones(n2) * n2))).dot(K2.T) N = N1 + N2 eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(N).dot(M)) # Make a list of (eigenvalue, eigenvector) tuples eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:, i]) for i in range(len(eig_vals))] # Sort the (eigenvalue, eigenvector) tuples from high to low eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True) # Construct KxD eigenvector matrix W W = eig_pairs[0][1] ldaX = [] for j in range(len(X)): ldaX.append( sum([ W[i] * mlpy.kernel_gaussian(X[j], X[i], sigma=15) for i in range(n) ])) return ldaX
def f(TrainIn, TrainOut, TestIn): print "init......" x = numpy.array(TrainIn) y = numpy.array(TrainOut) t = numpy.array(TestIn) print "learn......" k = mlpy.kernel_gaussian(x, x, sigma=1) kt = mlpy.kernel_gaussian(t, x, sigma=1) krr = mlpy.KernelRidge(lmb=0.01) krr.learn(k, y) print "out......" re = krr.pred(t) return re
def metric(self): totalTimer = Timer() with totalTimer: if self.method_param["kernel"] == "polynomial": if "degree" in self.method_param: degree = int(self.method_param["degree"]) else: degree = 1 kernel = mlpy.kernel_polynomial(self.data[0], self.data[0], d=degree) elif self.method_param["kernel"] == "gaussian": kernel = mlpy.kernel_gaussian(self.data[0], self.data[0], sigma=2) elif self.method_param["kernel"] == "linear": kernel = mlpy.kernel_linear(self.data[0], self.data[0]) elif self.method_param["kernel"] == "hyptan": kernel = mlpy.kernel_sigmoid(self.data[0], self.data[0]) model = mlpy.KPCA() model.learn(kernel) out = model.transform(kernel, **self.build_opts) metric = {} metric["runtime"] = totalTimer.ElapsedTime() return metric
def RunKPCAMlpy(): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') try: with totalTimer: # Get the new dimensionality, if it is necessary. if "new_dimensionality" in options: d = int(options.pop("new_dimensionality")) if (d > data.shape[1]): Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") return -1 else: d = data.shape[0] # Get the kernel type and make sure it is valid. if not "kernel" in options: Log.Fatal("Choose kernel type, valid choices are 'polynomial', " + "'gaussian', 'linear' and 'hyptan'.") return -1 if options["kernel"] == "polynomial": if "degree" in options: degree = int(options.pop("degree")) else: degree = 1 kernel = mlpy.kernel_polynomial(data, data, d=degree) elif options["kernel"] == "gaussian": kernel = mlpy.kernel_gaussian(data, data, sigma=2) elif options["kernel"] == "linear": kernel = mlpy.kernel_linear(data, data) elif options["kernel"] == "hyptan": kernel = mlpy.kernel_sigmoid(data, data) else: Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " + "choices are 'polynomial', 'gaussian', 'linear' and 'hyptan'.") return -1 options.pop("kernel") if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") # Perform Kernel Principal Components Analysis. model = mlpy.KPCA() model.learn(kernel) out = model.transform(kernel, k=d) except Exception as e: Log.Fatal("Exception: " + str(e)) return -1 return totalTimer.ElapsedTime()
def RunKPCAMlpy(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') try: with totalTimer: # Get the new dimensionality, if it is necessary. dimension = re.search('-d (\d+)', options) if not dimension: d = data.shape[0] else: d = int(dimension.group(1)) if (d > data.shape[1]): Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") q.put(-1) return -1 # Get the kernel type and make sure it is valid. kernel = re.search("-k ([^\s]+)", options) if not kernel: Log.Fatal("Choose kernel type, valid choices are 'polynomial', " + "'gaussian', 'linear' and 'hyptan'.") q.put(-1) return -1 elif kernel.group(1) == "polynomial": degree = re.search('-D (\d+)', options) degree = 1 if not degree else int(degree.group(1)) kernel = mlpy.kernel_polynomial(data, data, d=degree) elif kernel.group(1) == "gaussian": kernel = mlpy.kernel_gaussian(data, data, sigma=2) elif kernel.group(1) == "linear": kernel = mlpy.kernel_linear(data, data) elif kernel.group(1) == "hyptan": kernel = mlpy.kernel_sigmoid(data, data) else: Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " + "choices are 'polynomial', 'gaussian', 'linear' and 'hyptan'.") q.put(-1) return -1 # Perform Kernel Principal Components Analysis. model = mlpy.KPCA() model.learn(kernel) out = model.transform(kernel, k=d) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def KRRProTy(): np.random.seed(10) targetValues = np.genfromtxt("F:\PY\data\stockPT.txt", skip_header=1, dtype=None, delimiter="\t", usecols=(1)) trainingPoints = np.arange(targetValues.size).reshape(-1, 1) testPoints = np.arange(targetValues.size + 1).reshape(-1, 1) # training kernel matrix knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) # testing kernel matrix knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = KernelRidge(lmb=0.01, kernel=None) knlRidge.learn(knl, targetValues) resultPoints = knlRidge.pred(knlTest) print targetValues.size, resultPoints fig = plt.figure(1) plot1 = plt.plot(trainingPoints, targetValues, "o") plot2 = plt.plot(testPoints, resultPoints) plt.show()
def draw_mlpy_example(data, clabs, testData): gK = mlpy.kernel_gaussian(data, data, sigma=2) # gaussian kernel matrix gaussian_pca = mlpy.KPCA(mlpy.KernelGaussian(2.0)) gaussian_pca.learn(data) gz = gaussian_pca.transform(data, k=2) fig = plt.figure(1) ax1 = plt.subplot(121) plot1 = plt.scatter(data[:, 0], data[:, 1], c=clabs) plot1_5 = plt.scatter(testData[:, 0], testData[:, 1]) title1 = ax1.set_title('Original data') trTestData = gaussian_pca.transform(testData, k=2) ax2 = plt.subplot(122) plot2 = plt.scatter(gz[:, 0], gz[:, 1], c=clabs) plot2_5 = plt.scatter(trTestData[:, 0], trTestData[:, 1]) title2 = ax2.set_title('Gaussian kernel') plt.show()
import numpy as np import matplotlib.pyplot as plt import mlpy np.random.seed(0) x = np.arange(0, 2, 0.05).reshape(-1, 1) # training points y = np.ravel(np.exp(x)) + np.random.normal(1, 0.2, x.shape[0]) # target values xt = np.arange(0, 2, 0.01).reshape(-1, 1) # testing points K = mlpy.kernel_gaussian(x, x, sigma=1) # training kernel matrix Kt = mlpy.kernel_gaussian(xt, x, sigma=1) # testing kernel matrix krr = mlpy.KernelRidge(lmb=0.01) krr.learn(K, y) yt = krr.pred(Kt) fig = plt.figure(1) plot1 = plt.plot(x[:, 0], y, 'o') plot2 = plt.plot(xt[:, 0], yt) plt.show()
""" Confidence level generated from the histogram data """ (con,z_h)=confid_inter.hist_conf_interval(x,y,XHist,YHist) """ Random confidence level """ rcon=confid_inter.conf_interval(len(x),5) """-------Select the histogram confidence level------- """ inter=con """-------Add a third feature of confidence interval to x&y------- """ zc=column_stack((x,y,inter)) z=zc # Adds confidence value to z ######################################################################## """-------Implement kernel PCA with gaussian kernel------- """ xG = mlpy.kernel_gaussian(z, z, sigma=2) # gaussian kernel matrix gaussian_pca = mlpy.KPCA() gaussian_pca.learn(xG) xg = gaussian_pca.transform(xG, k=2) ######################################################################## """-------Implement kernel PCA with polynomial kernel------- """ xP= mlpy.kernel_polynomial(z,z, gamma=1.0, b=1.0, d=2.0) # polynomial kernel matrix polynomial_pca = mlpy.KPCA() polynomial_pca.learn(xP) xp = polynomial_pca.transform(xP, k=3) print 'Implementing Kernel PCA' ########################################################################
def klda(X, y, img_f): """ Function to reduce the data and give the reduced transformation matrix X - The original dimensional data y - The labels @returns the dimnsionally reduced data """ k = len(np.unique(y)) # Calculate the number of entries for each class _, Ns = np.unique(y, return_counts=True) N, m = X.shape # Obtain all the indices that contain each class separately class_indices = [] for c in np.unique(y): class_indices.append(np.where(y == c)) # Calculate the Gram matrix after the Kernel Trick G = mlpy.kernel_gaussian(X, X, sigma=2.0) # print G.shape # Separate the k classes into k different matrices # Each entry in the c_list is N*nk c_list = [] te = 0 for i in range(k): c_temp = G[:, te:te + Ns[i]] te += Ns[i] c_list.append(c_temp) # Initialize the between class scatter matrix and the within class scatter matrix sb = np.zeros([N, N], np.float32) sw = np.zeros([N, N], np.float32) # Calculate the mean of each class # Each mean vector is N*1 means = [] for i in range(k): ci = np.sum(c_list[i], 1) / Ns[i] ci = np.reshape(ci, (N, 1)) means.append(ci) # Calculate the mean of means # The mean of means is also a N*1 vector mean_overall = np.zeros((N, 1), np.float32) for meani in means: mean_overall += meani mean_overall /= k # Calculate sb for i in range(k): sb += Ns[i] * np.matmul((means[i] - mean_overall), (means[i] - mean_overall).T) # Calculate sw for j in range(k): for i in range(Ns[j]): sw += np.matmul((c_list[j][:, i] - means[j]), (c_list[j][:, i] - means[j]).T) # Calculate the eigen values and sorted eigen vectors of sw_inv_sb sw_inv_sb = np.matmul(np.linalg.pinv(sw), sb) eig_vals, eig_vecs = np.linalg.eig(sw_inv_sb) indices = np.argsort(eig_vals)[::-1] plot_eigs(eig_vals, indices, img_f) # Reduce the data # Choose the dimension to reduce to after analyzing the plot of eigen values to_red = 4 indices = indices[:to_red] eig_vecs = eig_vecs[indices] W = np.reshape(eig_vecs[0], (N, 1)) for i in range(1, to_red): W = np.concatenate((W, np.reshape(eig_vecs[i], (N, 1))), axis=1) # print W.shape return np.matmul(W.T, G)
source = [] target = [] for s in sentences: words = [] nwords = [] for k in range(len(s) - 1): words.append(lut[s[k]]) nwords.append(lut[s[k + 1]]) source.append(np.r_[words]) target.extend(np.r_[nwords]) for b in source[:10]: reservoir.execute(b) initial_state = reservoir.states[-1] states = [] for k in range(len(source)): reservoir.states = np.c_[initial_state].T states.append(reservoir.execute(source[k])) X = np.vstack(states) import mlpy K = mlpy.kernel_gaussian(X.T, X.T, sigma=1) readout = mlpy.KernelRidge(lmb=0.01) readout.learn(K, np.array(target)[:, 0]) kernel_distrib = readout.pred(X)
import numpy as np import os import csv from mlpy import KPCA, kernel_gaussian # Load the data. f = open(os.path.dirname(__file__) + '../data/circle_data.txt') data = np.fromfile(f, dtype=np.float64, sep=' ') data = data.reshape(-1, 2) f.close() # Perform Kernel PCA. kernel = kernel_gaussian(data, data, sigma=2) # gaussian kernel matrix gaussian_pca = KPCA() gaussian_pca.learn(kernel) transformedData = gaussian_pca.transform(kernel, k=2) # Show transformed data. print transformedData
import numpy as np import matplotlib.pyplot as plt import mlpy np.random.seed(0) mean1, cov1, n1 = [1, 4.5], [[1,1],[1,2]], 20 # 20 samples of class 1 x1 = np.random.multivariate_normal(mean1, cov1, n1) y1 = np.ones(n1, dtype=np.int) mean2, cov2, n2 = [2.5, 2.5], [[1,1],[1,2]], 30 # 30 samples of class 2 x2 = np.random.multivariate_normal(mean2, cov2, n2) y2 = 2 * np.ones(n2, dtype=np.int) x = np.concatenate((x1, x2), axis=0) # concatenate the samples y = np.concatenate((y1, y2)) K = mlpy.kernel_gaussian(x, x, sigma=2) # kernel matrix xmin, xmax = x[:,0].min()-1, x[:,0].max()+1 ymin, ymax = x[:,1].min()-1, x[:,1].max()+1 xx, yy = np.meshgrid(np.arange(xmin, xmax, 0.02), np.arange(ymin, ymax, 0.02)) xt = np.c_[xx.ravel(), yy.ravel()] # test points Kt = mlpy.kernel_gaussian(xt, x, sigma=2) # test kernel matrix fig = plt.figure(1) cmap = plt.set_cmap(plt.cm.Paired) for i, c in enumerate([1, 10, 100, 1000]): ka = mlpy.KernelAdatron(C=c) ax = plt.subplot(2, 2, i+1) ka.learn(K, y) ytest = ka.pred(Kt).reshape(xx.shape) title = ax.set_title('C: %s; margin: %.3f; steps: %s;' % (c, ka.margin(), ka.steps())) plot1 = plt.pcolormesh(xx, yy, ytest) plot2 = plt.scatter(x[:,0], x[:,1], c=y)
alpha = np.linalg.solve(N, d) #alpha b = - np.dot(alpha, (n1 * m1 + n2 * m2) / float(n)) # ----- comparare yoonsang's Alpha and package's Alpha ----- import mlpy # mlpy is must be installed with command line function (pip, easy_install) # Using linear kernel Kl = mlpy.kernel_linear(X, X) # compute the kernel matrix linear_kfda = mlpy.KFDA(lmb=0.001) linear_kfda.learn(Kl, y) # compute the tranformation vector zl = linear_kfda.transform(Kl) # embedded x into the kernel fisher space # Using Gaussian kernel sig = 1 Kg = mlpy.kernel_gaussian(X, X, sigma=sig) # compute the kernel matrix gaussian_kfda = mlpy.KFDA(lmb=0.001) gaussian_kfda.learn(Kg, y) # compute the tranformation vector zg = gaussian_kfda.transform(Kg) # embedded x into the kernel fisher space gaussian_kfda._coeff # alpha # Using sigmoid kernel gam=0.1 Ks = mlpy.kernel_sigmoid(X, X, gamma=gam, b=1.0) # compute the kernel matrix sigmoid_kfda = mlpy.KFDA(lmb=0.001) sigmoid_kfda.learn(Ks, y) # compute the tranformation vector zs = sigmoid_kfda.transform(Ks) # embedded x into the kernel fisher space sigmoid_kfda._coeff # Using polynomial kernel gam = 1.0
y = np.convolve(w / w.sum(), s, mode='same') return y[window_len:-window_len + 1] y = np.genfromtxt("Gold.csv", skip_header=1, dtype=None, delimiter=',', usecols=(1)) targetValues = smooth(y, len(y)) np.random.seed(10) trainingPoints = np.arange(125).reshape(-1, 1) testPoints = np.arange(126).reshape(-1, 1) knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = mlpy.KernelRidge(lmb=0.01, kernel=None) knlRidge.learn(knl, targetValues) resultPoints = knlRidge.pred(knlTest) print(resultPoints) plt.step(trainingPoints, targetValues, 'o') plt.step(testPoints, resultPoints) plt.show()
def klda(X, y): K = mlpy.kernel_gaussian(X, X, sigma=15) kfda = mlpy.KFDA(lmb=0.0) kfda.learn(K, y) # compute the tranformation vector z = kfda.transform(K) # embedded x into the kernel fisher space return (z)
y = np.genfromtxt("Gold.csv", skip_header=1, dtype=None, delimiter=',', usecols=(1)) targetValues = smooth(y, len(y)) np.random.seed(10) trainingPoints = np.arange(125).reshape(-1, 1) testPoints = np.arange(126).reshape(-1, 1) knl = mlpy.kernel_gaussian(trainingPoints, trainingPoints, sigma=1) knlTest = mlpy.kernel_gaussian(testPoints, trainingPoints, sigma=1) knlRidge = mlpy.KernelRidge(lmb=0.01, kernel=None) knlRidge.learn(knl, targetValues) resultPoints = knlRidge.pred(knlTest) print(resultPoints) plt.step(trainingPoints, targetValues, 'o') plt.step(testPoints, resultPoints) plt.show()
def RunKPCAMlpy(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') try: with totalTimer: # Get the new dimensionality, if it is necessary. dimension = re.search('-d (\d+)', options) if not dimension: d = data.shape[0] else: d = int(dimension.group(1)) if (d > data.shape[1]): Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") q.put(-1) return -1 # Get the kernel type and make sure it is valid. kernel = re.search("-k ([^\s]+)", options) if not kernel: Log.Fatal( "Choose kernel type, valid choices are 'polynomial', " + "'gaussian', 'linear' and 'hyptan'.") q.put(-1) return -1 elif kernel.group(1) == "polynomial": degree = re.search('-D (\d+)', options) degree = 1 if not degree else int(degree.group(1)) kernel = mlpy.kernel_polynomial(data, data, d=degree) elif kernel.group(1) == "gaussian": kernel = mlpy.kernel_gaussian(data, data, sigma=2) elif kernel.group(1) == "linear": kernel = mlpy.kernel_linear(data, data) elif kernel.group(1) == "hyptan": kernel = mlpy.kernel_sigmoid(data, data) else: Log.Fatal( "Invalid kernel type (" + kernel.group(1) + "); valid " + "choices are 'polynomial', 'gaussian', 'linear' and 'hyptan'." ) q.put(-1) return -1 # Perform Kernel Principal Components Analysis. model = mlpy.KPCA() model.learn(kernel) out = model.transform(kernel, k=d) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def run_stack(SEED): model = "base" trainBase = csv_io_np.read_data("PreProcessData/train.csv", skipFirstLine=True, split=",") test = csv_io_np.read_data("PreProcessData/test.csv", skipFirstLine=True, split=",") print "Data Read Complete" avg = 0 NumFolds = 5 predicted_list = [] bootstrapLists = [] # 100 producted 94% # 1000 did not finish in about 5+ hours... # 300 about 5 hours, .9691 on first CF # learn_rate=0.01, n_estimators=300, subsample=1.0, min_samples_split=30, 0.9386 # GradientBoostingClassifier(loss='deviance', learn_rate=0.1, n_estimators=300, subsample=1.0, min_samples_split=30, min_samples_leaf=1, max_depth=5, init=None, random_state=None, max_features=None) clfs = [1] print "Data size: ", len(trainBase), len(test) dataset_blend_train = np.zeros((len(trainBase), len(clfs))) dataset_blend_test = np.zeros((len(test), len(clfs))) trainNew = [] trainTestNew = [] testNew = [] trainNewSelect = [] trainTestNewSelect = [] testNewSelect = [] print "Scaling" targetPre = [x[0] for x in trainBase] trainPre = [x[1:] for x in trainBase] testPre = [x[0:] for x in test] #print trainPre[0] #scaler = preprocessing.Scaler().fit(trainPre) #trainScaled = scaler.transform(trainPre) #testScaled = scaler.transform(testPre) trainScaled = trainPre testScaled = testPre #print scaler.mean_ #print scaler.std_ print "Begin Training" lenTrainBase = len(trainBase) trainBase = [] lenTest = len(test) test = [] trainPre = [] testPre = [] gc.collect() CC = [6] gg = [-6.36, -6.35, -6.34, -6.33, -6.32] for ExecutionIndex, clf in enumerate(clfs): print clf avg = 0 predicted_list = [] dataset_blend_test_set = np.zeros((lenTest, NumFolds)) foldCount = 0 avg = 0 #Stratified for classification...[trainBase[i][0] for i in range(len(trainBase))] Folds = cross_validation.KFold(lenTrainBase, k=NumFolds, indices=True) for C in CC: for g in gg: for train_index, test_index in Folds: print "g:", g, "C:", C #trainBaseTemp = [trainBase[i] for i in train_index] #target = [x[0] for x in trainBaseTemp] #train = [x[1:] for x in trainBaseTemp] #testBaseTemp = [trainBase[i] for i in test_index] #targetTest = [x[0] for x in testBaseTemp] #trainTest = [x[1:] for x in testBaseTemp] #test = [x[0:] for x in test] target = [targetPre[i] for i in train_index] train = [trainScaled[i] for i in train_index] targetTest = [targetPre[i] for i in test_index] trainTest = [trainScaled[i] for i in test_index] print print "Iteration: ", foldCount print "LEN: ", len(train), len( train[0]), len(target), len(trainTest), len( trainTest[0]) K = mlpy.kernel_gaussian( train, train, sigma=1.0 ) # http://elf-project.sourceforge.net/ 26.9048 clf = KernelRidge( lmb=6.62789e-08) #also might need to set lambda print datetime.datetime.now() clf.learn(K, target) print datetime.datetime.now() Kt = mlpy.kernel_gaussian(trainTest, train, sigma=1) prob = krr.pred(Kt) print datetime.datetime.now() dataset_blend_train[test_index, ExecutionIndex] = prob probSum = 0.0 count = 0.0 for i in range(0, len(prob)): probX = prob[i] #[1] #print probX, targetTest[i] if (targetTest[i] == probX): probSum += 1.0 count = count + 1.0 print "Sum: ", probSum, count print "Score: ", probSum / count avg += (probSum / count) / NumFolds #predicted_probs = clf.predict(testScaled) ######predicted_list.append([x[1] for x in predicted_probs]) #dataset_blend_test_set[:, foldCount] = predicted_probs #[0] foldCount = foldCount + 1 break #dataset_blend_test[:,ExecutionIndex] = dataset_blend_test_set.mean(1) now = datetime.datetime.now() #csv_io.write_delimited_file_single("../predictions/Stack_" + now.strftime("%Y%m%d%H%M%S") + "_" + str(avg) + "_" + str(clf)[:12] + ".csv", dataset_blend_test_set.mean(1)) #csv_io.write_delimited_file_single("../predictions/Target_Stack_" + now.strftime("%Y%m%d%H%M%S") + "_" + str(avg) + "_" + str(clf)[:12] + ".csv", dataset_blend_train[:,ExecutionIndex] ) csv_io.write_delimited_file("../tune/TuneLog.csv", [ now.strftime("%Y %m %d %H %M %S"), "Score:", str(avg * NumFolds), str(clf), "Folds:", str(NumFolds), "Model", model, "", "" ], filemode="a", delimiter=",") #print "------------------------Average: ", avg return dataset_blend_train, dataset_blend_test
def run_stack(SEED): model = "base" trainBase = csv_io_np.read_data("PreProcessData/train.csv", skipFirstLine = True, split = ",") test = csv_io_np.read_data("PreProcessData/test.csv", skipFirstLine = True, split = ",") print "Data Read Complete" avg = 0 NumFolds = 5 predicted_list = [] bootstrapLists = [] # 100 producted 94% # 1000 did not finish in about 5+ hours... # 300 about 5 hours, .9691 on first CF # learn_rate=0.01, n_estimators=300, subsample=1.0, min_samples_split=30, 0.9386 # GradientBoostingClassifier(loss='deviance', learn_rate=0.1, n_estimators=300, subsample=1.0, min_samples_split=30, min_samples_leaf=1, max_depth=5, init=None, random_state=None, max_features=None) clfs = [ 1 ] print "Data size: ", len(trainBase), len(test) dataset_blend_train = np.zeros((len(trainBase), len(clfs))) dataset_blend_test = np.zeros((len(test), len(clfs))) trainNew = [] trainTestNew = [] testNew = [] trainNewSelect = [] trainTestNewSelect = [] testNewSelect = [] print "Scaling" targetPre = [x[0] for x in trainBase] trainPre = [x[1:] for x in trainBase] testPre = [x[0:] for x in test] #print trainPre[0] #scaler = preprocessing.Scaler().fit(trainPre) #trainScaled = scaler.transform(trainPre) #testScaled = scaler.transform(testPre) trainScaled = trainPre testScaled = testPre #print scaler.mean_ #print scaler.std_ print "Begin Training" lenTrainBase = len(trainBase) trainBase = [] lenTest = len(test) test = [] trainPre = [] testPre = [] gc.collect() CC = [6] gg = [-6.36, -6.35, -6.34, -6.33, -6.32] for ExecutionIndex, clf in enumerate(clfs): print clf avg = 0 predicted_list = [] dataset_blend_test_set = np.zeros((lenTest, NumFolds)) foldCount = 0 avg = 0 #Stratified for classification...[trainBase[i][0] for i in range(len(trainBase))] Folds = cross_validation.KFold(lenTrainBase, k=NumFolds, indices=True) for C in CC: for g in gg: for train_index, test_index in Folds: print "g:", g, "C:" , C #trainBaseTemp = [trainBase[i] for i in train_index] #target = [x[0] for x in trainBaseTemp] #train = [x[1:] for x in trainBaseTemp] #testBaseTemp = [trainBase[i] for i in test_index] #targetTest = [x[0] for x in testBaseTemp] #trainTest = [x[1:] for x in testBaseTemp] #test = [x[0:] for x in test] target = [targetPre[i] for i in train_index] train = [trainScaled[i] for i in train_index] targetTest = [targetPre[i] for i in test_index] trainTest = [trainScaled[i] for i in test_index] print print "Iteration: ", foldCount print "LEN: ", len(train), len(train[0]), len(target), len(trainTest), len(trainTest[0]) K = mlpy.kernel_gaussian(train, train, sigma=1.0) # http://elf-project.sourceforge.net/ 26.9048 clf = KernelRidge(lmb=6.62789e-08) #also might need to set lambda print datetime.datetime.now() clf.learn(K, target) print datetime.datetime.now() Kt = mlpy.kernel_gaussian(trainTest, train, sigma=1) prob = krr.pred(Kt) print datetime.datetime.now() dataset_blend_train[test_index, ExecutionIndex] = prob probSum = 0.0 count = 0.0 for i in range(0, len(prob)): probX = prob[i]#[1] #print probX, targetTest[i] if ( targetTest[i] == probX ) : probSum += 1.0 count = count + 1.0 print "Sum: ", probSum, count print "Score: ", probSum/count avg += (probSum/count)/NumFolds #predicted_probs = clf.predict(testScaled) ######predicted_list.append([x[1] for x in predicted_probs]) #dataset_blend_test_set[:, foldCount] = predicted_probs #[0] foldCount = foldCount + 1 break #dataset_blend_test[:,ExecutionIndex] = dataset_blend_test_set.mean(1) now = datetime.datetime.now() #csv_io.write_delimited_file_single("../predictions/Stack_" + now.strftime("%Y%m%d%H%M%S") + "_" + str(avg) + "_" + str(clf)[:12] + ".csv", dataset_blend_test_set.mean(1)) #csv_io.write_delimited_file_single("../predictions/Target_Stack_" + now.strftime("%Y%m%d%H%M%S") + "_" + str(avg) + "_" + str(clf)[:12] + ".csv", dataset_blend_train[:,ExecutionIndex] ) csv_io.write_delimited_file("../tune/TuneLog.csv", [now.strftime("%Y %m %d %H %M %S"), "Score:" , str(avg*NumFolds), str(clf), "Folds:", str(NumFolds), "Model", model, "", ""], filemode="a",delimiter=",") #print "------------------------Average: ", avg return dataset_blend_train, dataset_blend_test