def evaluate4svm(labels, feats, params={'c': 1, 'kernal': 'gauss'}, Nsplit=2): """ Run Cross-validation to evaluate the SVM. Parameters ---------- labels: 2d array Data set labels. feats: array Data set feats. params: dictionary Search scope parameters. Nsplit: int, default = 2 The n for n-fold cross validation. """ c = params.get('c') if params.get('kernal' == 'gauss'): kernal = GaussianKernel() kernal.set_width(80) elif params.get('kernal' == 'sigmoid'): kernal = SigmoidKernel() else: kernal = LinearKernel() split = CrossValidationSplitting(labels, Nsplit) split.build_subsets() accuracy = np.zeros(Nsplit) time_test = np.zeros(accuracy.shape) for i in range(Nsplit): idx_train = split.generate_subset_inverse(i) idx_test = split.generate_subset_indices(i) feats.add_subset(idx_train) labels.add_subset(idx_train) print c, kernal, labels svm = GMNPSVM(c, kernal, labels) _ = svm.train(feats) out = svm.apply(feats_test) evaluator = MulticlassAccuracy() accuracy[i] = evaluator.evaluate(out, labels_test) feats.remove_subset() labels.remove_subset() feats.add_subset(idx_test) labels.add_subset(idx_test) t_start = time.clock() time_test[i] = (time.clock() - t_start) / labels.get_num_labels() feats.remove_subset() labels.remove_subset() return accuracy
def classifier_gmnpsvm_modular (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,width=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import GaussianKernel, GMNPSVM, CSVFile feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) labels=MulticlassLabels(CSVFile(label_fname)) kernel=GaussianKernel(feats_train, feats_train, width) svm=GMNPSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train(feats_train) out=svm.apply(feats_test).get_labels() return out,kernel
def classifier_gmnpsvm_modular(train_fname=traindat, test_fname=testdat, label_fname=label_traindat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import GaussianKernel, GMNPSVM, CSVFile feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = MulticlassLabels(CSVFile(label_fname)) kernel = GaussianKernel(feats_train, feats_train, width) svm = GMNPSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train(feats_train) out = svm.apply(feats_test).get_labels() return out, kernel
class Ai: def __init__(self): self.x = None self.y = None self.x_test = None self.y_test = None self.svm = None def load_train_data(self, x_fname, y_fname): Ai.__init__(self) self.x = np.loadtxt(x_fname) self.y = np.loadtxt(y_fname) - 1.0 self.x_test = self.x self.y_test = self.y def _svm_new(self, kernel_width, c, epsilon): if self.x == None or self.y == None: raise Exception("No training data loaded.") x = RealFeatures(self.x) y = Labels(self.y) self.svm = GMNPSVM(c, GaussianKernel(x, x, kernel_width), y) self.svm.set_epsilon(epsilon) def write_svm(self): gz_stream = gz.open(com.TRAIN_SVM_FNAME_GZ, 'wb', 9) pkl.dump(self.svm, gz_stream) gz_stream.close() def read_svm(self): gz_stream = gz.open(com.TRAIN_SVM_FNAME_GZ, 'rb') self.svm = pkl.load(gz_stream) gz_stream.close() def enable_validation(self, train_frac): x = self.x y = self.y idx = np.arange(len(y)) np.random.shuffle(idx) train_idx = idx[:np.floor(train_frac * len(y))] test_idx = idx[np.ceil(train_frac * len(y)):] self.x = x[:, train_idx] self.y = y[train_idx] self.x_test = x[:, test_idx] self.y_test = y[test_idx] def train(self, kernel_width, c, epsilon): self._svm_new(kernel_width, c, epsilon) x = RealFeatures(self.x) self.svm.io.enable_progress() self.svm.train(x) self.svm.io.disable_progress() def load_classifier(self): self.read_svm() def classify(self, matrix): cl = self.svm.apply( RealFeatures( np.reshape(matrix, newshape=(com.FEATURE_DIM, 1), order='F'))).get_label(0) return int(cl + 1.0) % 10 def get_test_error(self): self.svm.io.enable_progress() l = self.svm.apply(RealFeatures(self.x_test)).get_labels() self.svm.io.disable_progress() return 1.0 - np.mean(l == self.y_test)
class Ai: def __init__(self): self.x = None self.y = None self.x_test = None self.y_test = None self.svm = None def load_train_data(self, x_fname, y_fname): Ai.__init__(self) self.x = np.loadtxt(x_fname) self.y = np.loadtxt(y_fname) - 1.0 self.x_test = self.x self.y_test = self.y def _svm_new(self, kernel_width, c, epsilon): if self.x == None or self.y == None: raise Exception("No training data loaded.") x = RealFeatures(self.x) y = MulticlassLabels(self.y) self.svm = GMNPSVM(c, GaussianKernel(x, x, kernel_width), y) self.svm.set_epsilon(epsilon) def write_svm(self): gz_stream = gz.open(com.TRAIN_SVM_FNAME_GZ, 'wb', 9) pkl.dump(self.svm, gz_stream) gz_stream.close() def read_svm(self): gz_stream = gz.open(com.TRAIN_SVM_FNAME_GZ, 'rb') self.svm = pkl.load(gz_stream) gz_stream.close() def enable_validation(self, train_frac): x = self.x y = self.y idx = np.arange(len(y)) np.random.shuffle(idx) train_idx=idx[:np.floor(train_frac*len(y))] test_idx=idx[np.ceil(train_frac*len(y)):] self.x = x[:,train_idx] self.y = y[train_idx] self.x_test = x[:,test_idx] self.y_test = y[test_idx] def train(self, kernel_width, c, epsilon): self._svm_new(kernel_width, c, epsilon) x = RealFeatures(self.x) self.svm.io.enable_progress() self.svm.train(x) self.svm.io.disable_progress() def load_classifier(self): self.read_svm() def classify(self, matrix): cl = self.svm.apply( RealFeatures( np.reshape(matrix, newshape=(com.FEATURE_DIM, 1), order='F') ) ).get_label(0) return int(cl + 1.0) % 10 def get_test_error(self): self.svm.io.enable_progress() l = self.svm.apply(RealFeatures(self.x_test)).get_labels() self.svm.io.disable_progress() return 1.0 - np.mean(l == self.y_test)