def classifier_gpbtsvm_modular(train_fname=traindat, test_fname=testdat, label_fname=label_traindat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, BinaryLabels from modshogun import GaussianKernel from modshogun import CSVFile try: from modshogun import GPBTSVM except ImportError: print("GPBTSVM not available") exit(0) feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = BinaryLabels(CSVFile(label_fname)) kernel = GaussianKernel(feats_train, feats_train, width) svm = GPBTSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() predictions = svm.apply(feats_test) return predictions, svm, predictions.get_labels()
def classifier_multiclassliblinear_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import MulticlassLibLinear feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassLibLinear(C, feats_train, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclass_shareboost (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, RealSubsetFeatures, MulticlassLabels from modshogun import ShareBoost #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) feats_train = RealFeatures(fm_train_real) labels = MulticlassLabels(label_train_multiclass) shareboost = ShareBoost(feats_train, labels, min(fm_train_real.shape[0]-1, 30)) shareboost.train(); #print(shareboost.get_activeset()) feats_test = RealSubsetFeatures(RealFeatures(fm_test_real), shareboost.get_activeset()) label_pred = shareboost.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) #print('Accuracy = %.4f' % acc) return out
def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \ width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel try: from modshogun import SVRLight except ImportError: print('No support for SVRLight available.') return feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) kernel=GaussianKernel(feats_train, feats_train, width) labels=RegressionLabels(label_train) svr=SVRLight(C, epsilon, kernel, labels) svr.set_tube_epsilon(tube_epsilon) svr.parallel.set_num_threads(num_threads) svr.train() kernel.init(feats_train, feats_test) out = svr.apply().get_labels() return out, kernel
def RunDTCShogun(q): totalTimer = Timer() Log.Info("Loading dataset", self.verbose) trainData, labels = SplitTrainData(self.dataset) trainData = RealFeatures(trainData.T) labels = MulticlassLabels(labels) testData = RealFeatures(LoadDataset(self.dataset[1]).T) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") try: with totalTimer: self.model = self.BuildModel(trainData, labels, options) # Run the CARTree Classifier on the test dataset. self.model.apply_multiclass(testData).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunNBCShogun(): totalTimer = Timer() self.predictions = None Log.Info("Loading dataset", self.verbose) try: # Load train and test dataset. trainData = np.genfromtxt(self.dataset[0], delimiter=',') testData = np.genfromtxt(self.dataset[1], delimiter=',') # Labels are the last row of the training set. labels = MulticlassLabels(trainData[:, (trainData.shape[1] - 1)]) with totalTimer: # Transform into features. trainFeat = RealFeatures(trainData[:, :-1].T) testFeat = RealFeatures(testData.T) # Create and train the classifier. self.model = self.BuildModel(trainFeat, labels, options) # Run Naive Bayes Classifier on the test dataset. self.predictions = self.model.apply_multiclass( testFeat).get_labels() except Exception as e: return [-1] time = totalTimer.ElapsedTime() if len(self.dataset) > 1: return [time, self.predictions] return [time]
def classifier_multiclassocas_modular (num_vec=10,num_class=3,distance=15,width=2.1,C=1,epsilon=1e-5,seed=1): from modshogun import RealFeatures, MulticlassLabels from modshogun import MulticlassOCAS from modshogun import Math_init_random # reproducible results random.seed(seed) Math_init_random(seed) # generate some training data where each class pair is linearly separable label_train=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") label_test=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") fm_train=array(random.randn(num_class,num_vec)) fm_test=array(random.randn(num_class,num_vec)) for i in range(len(label_train)): fm_train[label_train[i],i]+=distance fm_test[label_test[i],i]+=distance feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) labels=MulticlassLabels(label_train) classifier = MulticlassOCAS(C,feats_train,labels) classifier.train() out = classifier.apply(feats_test).get_labels() #print label_test #print out return out,classifier
def classifier_multiclassmachine_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = MulticlassLabels(label_train_multiclass) classifier = LibSVM() classifier.set_epsilon(epsilon) #print labels.get_labels() mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(), kernel, classifier, labels) mc_classifier.train() kernel.init(feats_train, feats_test) out = mc_classifier.apply().get_labels() return out
def RunRandomForestShogun(q): totalTimer = Timer() Log.Info("Loading dataset", self.verbose) trainData, labels = SplitTrainData(self.dataset) trainData = RealFeatures(trainData.T) labels = MulticlassLabels(labels) testData = RealFeatures(LoadDataset(self.dataset[1]).T) # Number of Trees. n = re.search("-n (\d+)", options) # Number of attributes to be chosen randomly to select from. f = re.search("-f (\d+)", options) self.form = 1 if not f else int(f.group(1)) self.numTrees = 10 if not n else int(n.group(1)) try: with totalTimer: self.model = self.BuildModel(trainData, labels, options) # Run the Random Forest Classifier on the test dataset. self.model.apply_multiclass(testData).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def classifier_multiclasslogisticregression_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, z=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels try: from modshogun import MulticlassLogisticRegression except ImportError: print("recompile shogun with Eigen3 support") return feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassLogisticRegression(z, feats_train, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def metric_lmnn_modular(train_fname=traindat, test_fname=testdat, label_train_fname=label_traindat, k=3): try: from modshogun import RealFeatures, MulticlassLabels, LMNN, KNN, CSVFile except ImportError: return # wrap features and labels into Shogun objects feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = MulticlassLabels(CSVFile(label_train_fname)) # LMNN lmnn = LMNN(feats_train, labels, k) lmnn.train() lmnn_distance = lmnn.get_distance() # perform classification with KNN knn = KNN(k, lmnn_distance, labels) knn.train() output = knn.apply(feats_test).get_labels() return lmnn, output
def RunLinearRegressionShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the responses # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: testSet = np.genfromtxt(self.dataset[1], delimiter=',') # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) with totalTimer: # Perform linear regression. model = LeastSquaresRegression(RealFeatures(X.T), RegressionLabels(y)) model.train() b = model.get_w() if len(self.dataset) == 2: pred = classifier.apply(RealFeatures(testSet.T)) self.predictions = pred.get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def classifier_svmocas_modular(train_fname=traindat, test_fname=testdat, label_fname=label_traindat, C=0.9, epsilon=1e-5, num_threads=1): from modshogun import RealFeatures, BinaryLabels from modshogun import CSVFile try: from modshogun import SVMOcas except ImportError: print("SVMOcas not available") return feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = BinaryLabels(CSVFile(label_fname)) svm = SVMOcas(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(False) svm.train() bias = svm.get_bias() w = svm.get_w() predictions = svm.apply(feats_test) return predictions, svm, predictions.get_labels()
def preprocessor_normone_modular(fm_train_real=traindat, fm_test_real=testdat, width=1.4, size_cache=10): from modshogun import Chi2Kernel from modshogun import RealFeatures from modshogun import NormOne feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) preprocessor = NormOne() preprocessor.init(feats_train) feats_train.add_preprocessor(preprocessor) feats_train.apply_preprocessor() feats_test.add_preprocessor(preprocessor) feats_test.apply_preprocessor() kernel = Chi2Kernel(feats_train, feats_train, width, size_cache) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() label_pred = mc_classifier.apply() out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def kernel_combined_modular (fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ): from modshogun import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel from modshogun import RealFeatures, StringCharFeatures, CombinedFeatures, DNA kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=GaussianKernel(10, 1.1) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) degree=3 subkernel=FixedDegreeStringKernel(10, degree) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) subkernel=LocalAlignmentStringKernel(10) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def multiclass_randomforest_modular(train=traindat, test=testdat, labels=label_traindat, ft=feattypes): try: from modshogun import RealFeatures, MulticlassLabels, CSVFile, RandomForest, MajorityVote except ImportError: print("Could not import Shogun modules") return # wrap features and labels into Shogun objects feats_train = RealFeatures(CSVFile(train)) feats_test = RealFeatures(CSVFile(test)) train_labels = MulticlassLabels(CSVFile(labels)) # Random Forest formation rand_forest = RandomForest(feats_train, train_labels, 20, 1) rand_forest.set_feature_types(ft) rand_forest.set_combination_rule(MajorityVote()) rand_forest.train() # Classify test data output = rand_forest.apply_multiclass(feats_test).get_labels() return rand_forest, output
def RunNBCShogun(q): totalTimer = Timer() Log.Info("Loading dataset", self.verbose) try: # Load train and test dataset. trainData = np.genfromtxt(self.dataset[0], delimiter=',') testData = np.genfromtxt(self.dataset[1], delimiter=',') # Labels are the last row of the training set. labels = MulticlassLabels(trainData[:, (trainData.shape[1] - 1)]) with totalTimer: # Transform into features. trainFeat = RealFeatures(trainData[:, :-1].T) testFeat = RealFeatures(testData.T) # Create and train the classifier. nbc = GaussianNaiveBayes(trainFeat, labels) nbc.train() # Run Naive Bayes Classifier on the test dataset. nbc.apply(testFeat).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def preprocessor_randomfouriergausspreproc_modular(fm_train_real=traindat, fm_test_real=testdat, width=1.4, size_cache=10): from modshogun import Chi2Kernel from modshogun import RealFeatures from modshogun import RandomFourierGaussPreproc feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) preproc = RandomFourierGaussPreproc() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() kernel = Chi2Kernel(feats_train, feats_train, width, size_cache) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def classifier_multiclass_relaxedtree(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import RelaxedTree, MulticlassLibLinear from modshogun import GaussianKernel #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) feats_train = RealFeatures(fm_train_real) labels = MulticlassLabels(label_train_multiclass) machine = RelaxedTree() machine.set_machine_for_confusion_matrix(MulticlassLibLinear()) machine.set_kernel(GaussianKernel()) machine.set_labels(labels) machine.train(feats_train) label_pred = machine.apply_multiclass(RealFeatures(fm_test_real)) out = label_pred.get_labels() if label_test_multiclass is not None: from modshogun import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def RunRandomForestShogun(): totalTimer = Timer() Log.Info("Loading dataset", self.verbose) trainData, labels = SplitTrainData(self.dataset) trainData = RealFeatures(trainData.T) labels = MulticlassLabels(labels) testData = RealFeatures(LoadDataset(self.dataset[1]).T) if "num_trees" in options: self.numTrees = int(options.pop("num_trees")) else: Log.Fatal("Required parameter 'num_trees' not specified!") raise Exception("missing parameter") self.form = 1 if "dimensions" in options: self.form = int(options.pop("dimensions")) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") try: with totalTimer: self.model = self.BuildModel(trainData, labels, options) # Run the Random Forest Classifier on the test dataset. self.predictions = self.model.apply_multiclass( testData).get_labels() except Exception as e: return [-1] time = totalTimer.ElapsedTime() return [time, self.predictions]
def predict(self, image): """ Predict the face """ #image as row imageAsRow = np.asarray( image.reshape(image.shape[0] * image.shape[1], 1), np.float64) #project inthe subspace p = self.pca.apply_to_feature_vector( RealFeatures(imageAsRow).get_feature_vector(0)) #min value to find the face minDist = 1e100 #class minClass = -1 #search which face is the best match for sampleIdx in range(len(self._projections)): test = RealFeatures(np.asmatrix(p, np.float64).T) projection = RealFeatures( np.asmatrix(self._projections[sampleIdx], np.float64).T) dist = EuclideanDistance(test, projection).distance(0, 0) if (dist < minDist): minDist = dist minClass = self._labels[sampleIdx] return minClass
def regression_cartree_modular(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes): try: from modshogun import RealFeatures, RegressionLabels, CSVFile, CARTree, PT_REGRESSION from numpy import random except ImportError: print("Could not import Shogun and/or numpy modules") return random.seed(1) # form training dataset : y=x with noise X_train=random.rand(1,num_train)*x_range; Y_train=X_train+random.randn(num_train)*noise_var # form test dataset X_test=array([[float(i)/num_test*x_range for i in range(num_test)]]) # wrap features and labels into Shogun objects feats_train=RealFeatures(X_train) feats_test=RealFeatures(X_test) train_labels=RegressionLabels(Y_train[0]) # CART Tree formation c=CARTree(ft,PT_REGRESSION,5,True) c.set_labels(train_labels) c.train(feats_train) # Classify test data output=c.apply_regression(feats_test).get_labels() return c,output
def features_dense_zero_copy_modular(in_data=data): feats = None if numpy.__version__ >= '1.5': feats = numpy.array(in_data, dtype=float64, order='F') a = RealFeatures() a.frombuffer(feats, False) b = numpy.array(a, copy=False) c = numpy.array(a, copy=True) d = RealFeatures() d.frombuffer(a, False) e = RealFeatures() e.frombuffer(a, True) a[:, 0] = 0 #print a[0:4] #print b[0:4] #print c[0:4] #print d[0:4] #print e[0:4] else: print("numpy version >= 1.5 is needed") return feats
def RunLinearRidgeRegressionShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the responses # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) >= 2: testSet = np.genfromtxt(self.dataset[1], delimiter=',') # Use the last row of the training set as the responses. X, y = SplitTrainData(self.dataset) tau = re.search("-t (\d+)", options) tau = 1.0 if not tau else int(tau.group(1)) try: with totalTimer: # Perform linear ridge regression. model = LRR(tau, RealFeatures(X.T), RegressionLabels(y)) model.train() if len(self.dataset) >= 2: model.apply_regression(RealFeatures(testSet.T)) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunMetrics(self, options): Log.Info("Perform Linear Ridge Regression.", self.verbose) results = self.LinearRidgeRegressionShogun(options) if results < 0: return results metrics = {'Runtime' : results} if len(self.dataset) >= 3: X, y = SplitTrainData(self.dataset) tau = re.search("-t (\d+)", options) tau = 1.0 if not tau else int(tau.group(1)) model = LRR(tau, RealFeatures(X.T), RegressionLabels(y)) model.train() testData = LoadDataset(self.dataset[1]) truelabels = LoadDataset(self.dataset[2]) predictedlabels = model.apply_regression(RealFeatures(testData.T)).get_labels() SimpleMSE = Metrics.SimpleMeanSquaredError(truelabels, predictedlabels) metrics['Simple MSE'] = SimpleMSE return metrics else: Log.Fatal("This method requires three datasets!")
def RunKMeansShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the centroids # file. Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: data = np.genfromtxt(self.dataset[0], delimiter=',') centroids = np.genfromtxt(self.dataset[1], delimiter=',') else: data = np.genfromtxt(self.dataset[0], delimiter=',') # Gather parameters. clusters = re.search("-c (\d+)", options) maxIterations = re.search("-m (\d+)", options) seed = re.search("-s (\d+)", options) # Now do validation of options. if not clusters and len(self.dataset) != 2: Log.Fatal("Required option: Number of clusters or cluster locations.") q.put(-1) return -1 elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2: Log.Fatal("Invalid number of clusters requested! Must be greater than" + " or equal to 1.") q.put(-1) return -1 m = 1000 if not maxIterations else int(maxIterations.group(1)) if seed: Math_init_random(seed.group(1)) try: dataFeat = RealFeatures(data.T) distance = EuclideanDistance(dataFeat, dataFeat) # Create the K-Means object and perform K-Means clustering. with totalTimer: if len(self.dataset) == 2: model = KMeans(int(clusters.group(1)), distance, RealFeatures(centroids)) else: model = KMeans(int(clusters.group(1)), distance) model.set_mbKMeans_iter(m) model.train() labels = model.apply().get_labels() centers = model.get_cluster_centers() except Exception as e: print(e) q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def regression_gaussian_process_modular (n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1): from modshogun import RealFeatures, RegressionLabels, GaussianKernel, Math try: from modshogun import GaussianLikelihood, ZeroMean, \ ExactInferenceMethod, GaussianProcessRegression except ImportError: print("Eigen3 needed for Gaussian Processes") return # reproducable results random.seed(seed) Math.init_random(17) # easy regression data: one dimensional noisy sine wave X = random.rand(1, n) * x_range X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]]) Y_test = sin(X_test) Y = sin(X) + random.randn(n) * noise_var # shogun representation labels = RegressionLabels(Y[0]) feats_train = RealFeatures(X) feats_test = RealFeatures(X_test) # GP specification shogun_width = width * width * 2 kernel = GaussianKernel(10, shogun_width) zmean = ZeroMean() lik = GaussianLikelihood() lik.set_sigma(noise_var) inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, lik) # train GP gp = GaussianProcessRegression(inf) gp.train() # some things we can do alpha = inf.get_alpha() diagonal = inf.get_diagonal_vector() cholesky = inf.get_cholesky() # get mean and variance vectors mean = gp.get_mean_vector(feats_test) variance = gp.get_variance_vector(feats_test) # plot results #plot(X[0],Y[0],'x') # training observations #plot(X_test[0],Y_test[0],'-') # ground truth of test #plot(X_test[0],mean, '-') # mean predictions of test #fill_between(X_test[0],mean-1.96*sqrt(variance),mean+1.96*sqrt(variance),color='grey') # 95% confidence interval #legend(["training", "ground truth", "mean predictions"]) #show() return alpha, diagonal, round(variance, 12), round(mean, 12), cholesky
def RunAllKnnShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the query # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') queryFeat = RealFeatures(queryFeat.T) else: referenceData = np.genfromtxt(self.dataset, delimiter=',') # Labels are the last row of the dataset. labels = MulticlassLabels( referenceData[:, (referenceData.shape[1] - 1)]) referenceData = referenceData[:, :-1] with totalTimer: # Get all the parameters. k = re.search("-k (\d+)", options) if not k: Log.Fatal( "Required option: Number of furthest neighbors to find." ) q.put(-1) return -1 else: k = int(k.group(1)) if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0" + " and less or equal than " + str(referenceData.shape[0])) q.put(-1) return -1 referenceFeat = RealFeatures(referenceData.T) distance = EuclideanDistance(referenceFeat, referenceFeat) # Perform All K-Nearest-Neighbors. model = SKNN(k, distance, labels) model.train() if len(self.dataset) == 2: out = model.apply(queryFeat).get_labels() else: out = model.apply(referenceFeat).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def RunLDAShogun(): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the test file. try: if len(self.dataset) > 1: testSet = LoadDataset(self.dataset[1]) # Use the last row of the training set as the responses. trainSet, trainLabels = SplitTrainData(self.dataset) # if the labels are not in {0,1,2,...,num_classes-1}, map them to this set and store the mapping # shogun's MCLDA class requires the labels to be in {0,1,2,...,num_classes-1} distinctLabels = list(set(trainLabels)) mapping = {} reverseMapping = {} idx = 0 for label in distinctLabels: mapping[label] = idx reverseMapping[idx] = label idx += 1 for i in range(len(trainLabels)): trainLabels[i] = mapping[trainLabels[i]] trainFeat = RealFeatures(trainSet.T) trainLabels = MulticlassLabels(trainLabels) # Gather optional parameters. if "tolerance" in options: self.tolerance = float(options.pop("tolerance")) if "store" in options: self.store = bool(options.pop("store")) if (len(options) > 0): Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") with totalTimer: self.model = MCLDA(trainFeat, trainLabels, self.tolerance, self.store) self.model.train() if (len(self.dataset) > 0): self.predictions = self.model.apply_multiclass( RealFeatures(testSet.T)) self.predictions = self.predictions.get_labels() # reverse map the predicted labels to actual labels for i in range(len(self.predictions)): self.predictions[i] = reverseMapping[ self.predictions[i]] except Exception as e: Log.Info("Exception: " + str(e)) return -1 time = totalTimer.ElapsedTime() return time